diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 14e1653..2698f9b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -14,19 +14,77 @@ concurrency: cancel-in-progress: true jobs: - helm-lint: + changes: + runs-on: "ubuntu-latest" + timeout-minutes: 2 + outputs: + infrahub: ${{ steps.filter.outputs.infrahub }} + enterprise: ${{ steps.filter.outputs.enterprise }} + observability: ${{ steps.filter.outputs.observability }} + steps: + - uses: "actions/checkout@v4" + - id: filter + uses: "dorny/paths-filter@v3" + with: + filters: | + infrahub: + - 'charts/infrahub/**' + enterprise: + - 'charts/infrahub-enterprise/**' + - 'charts/infrahub/**' + observability: + - 'charts/infrahub-observability/**' + - 'scripts/sync-dashboards.sh' + - 'scripts/transform_dashboard.py' + - 'scripts/validate_dashboards.py' + - 'scripts/known-metrics.yaml' + + helm-lint-infrahub: + needs: changes + if: ${{ needs.changes.outputs.infrahub == 'true' }} runs-on: "ubuntu-latest" timeout-minutes: 5 steps: - - name: "Check out repository code" - uses: "actions/checkout@v4" + - uses: "actions/checkout@v4" with: submodules: true - - name: "Install Helm" - uses: azure/setup-helm@v4.3.0 + - uses: "azure/setup-helm@v4.3.0" - name: "Updating dependencies" run: "helm dependency update charts/infrahub" - name: "Linting: helm lint infrahub" run: "helm lint charts/infrahub" - - name: "Linting: helm lint infrahub enterprise" + + helm-lint-enterprise: + needs: changes + if: ${{ needs.changes.outputs.enterprise == 'true' }} + runs-on: "ubuntu-latest" + timeout-minutes: 5 + steps: + - uses: "actions/checkout@v4" + with: + submodules: true + - uses: "azure/setup-helm@v4.3.0" + - name: "Linting: helm lint infrahub-enterprise" run: "helm lint charts/infrahub-enterprise" + + helm-lint-observability: + needs: changes + if: ${{ needs.changes.outputs.observability == 'true' }} + runs-on: "ubuntu-latest" + timeout-minutes: 10 + steps: + - uses: "actions/checkout@v4" + with: + submodules: true + - uses: "azure/setup-helm@v4.3.0" + - uses: "actions/setup-python@v5" + with: + python-version: "3.12" + - name: "Install Python deps" + run: "pip install pyyaml" + - name: "Updating dependencies: infrahub-observability" + run: "helm dependency update charts/infrahub-observability" + - name: "Linting: helm lint infrahub-observability" + run: "helm lint charts/infrahub-observability" + - name: "Static-validate dashboard queries against known-metrics allowlist" + run: "python3 scripts/validate_dashboards.py" diff --git a/.github/workflows/dashboard-drift-check.yml b/.github/workflows/dashboard-drift-check.yml new file mode 100644 index 0000000..93d6e85 --- /dev/null +++ b/.github/workflows/dashboard-drift-check.yml @@ -0,0 +1,80 @@ +--- +# yamllint disable rule:line-length +# +# Scheduled re-sync of upstream Grafana dashboards. If upstream changed the +# JSON at the recorded ref (or the transform script's output changes), this +# workflow opens (or updates) a draft PR with the re-synced dashboards so a +# human can review the diff. If there's no drift, the workflow is a no-op. +# +# Trigger options: +# - Scheduled: runs Mondays at 09:00 UTC +# - Manual: workflow_dispatch with an optional REF input +name: "Dashboard drift check" +on: + schedule: + - cron: "0 9 * * 1" + workflow_dispatch: + inputs: + ref: + description: "Upstream ref to sync against (default: ref recorded in .dashboards-source)" + required: false + type: string + +jobs: + drift-check: + runs-on: "ubuntu-latest" + timeout-minutes: 10 + permissions: + contents: write + pull-requests: write + steps: + - uses: "actions/checkout@v4" + - name: "Install yq" + run: | + sudo wget -q -O /usr/local/bin/yq https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 + sudo chmod +x /usr/local/bin/yq + - name: "Re-sync dashboards" + run: | + if [ -n "${{ inputs.ref }}" ]; then + ./scripts/sync-dashboards.sh "${{ inputs.ref }}" + else + ./scripts/sync-dashboards.sh + fi + - name: "Validate the synced dashboards" + run: | + python3 -m pip install pyyaml + python3 scripts/validate_dashboards.py + - name: "Open or update drift PR" + uses: "peter-evans/create-pull-request@v7" + with: + # The branch name is fixed so re-runs update the same PR rather + # than spawning a new one each week. + branch: "automation/dashboard-drift" + base: "${{ github.event.repository.default_branch }}" + title: "chore(observability): sync upstream dashboards" + commit-message: | + chore(observability): sync upstream dashboards + + Auto-generated by .github/workflows/dashboard-drift-check.yml. + Re-ran scripts/sync-dashboards.sh and committed any drift. + body: | + Automated drift detection re-ran `scripts/sync-dashboards.sh` + against the ref recorded in `.dashboards-source` + (`${{ inputs.ref || 'default' }}`) and found changes. + + **Review checklist** + + - [ ] Look at the rendered diff — are upstream's edits sensible? + - [ ] Confirm the transform pipeline still produces clean output + (no leftover `container_label_com_docker_compose_*` tokens). + - [ ] Spot-check at least one panel in Grafana against a live + cluster before merging. + - [ ] If upstream renamed a metric we no longer collect, update + `scripts/known-metrics.yaml`. + + Generated by `peter-evans/create-pull-request@v7`. + draft: true + labels: | + automation + observability + delete-branch: true diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..e9271ab --- /dev/null +++ b/Makefile @@ -0,0 +1,28 @@ +.PHONY: help sync-dashboards lint lint-observability template-observability deps-observability + +help: + @echo "Available targets:" + @echo " sync-dashboards - sync vendored dashboards from upstream infrahub (REF= to override)" + @echo " deps-observability - run 'helm dependency update' for the observability chart" + @echo " lint-observability - run 'helm lint' for the observability chart" + @echo " template-observability - render the observability chart with default values" + @echo " lint - lint every chart in charts/" + +sync-dashboards: + ./scripts/sync-dashboards.sh $(REF) + +deps-observability: + helm dependency update charts/infrahub-observability + +lint-observability: deps-observability + helm lint charts/infrahub-observability + +template-observability: deps-observability + helm template test charts/infrahub-observability + +lint: + @for chart in charts/*/; do \ + echo "==> linting $$chart"; \ + helm dependency update "$$chart" >/dev/null; \ + helm lint "$$chart"; \ + done diff --git a/charts/infrahub-observability/.dashboards-source b/charts/infrahub-observability/.dashboards-source new file mode 100644 index 0000000..a46f5d6 --- /dev/null +++ b/charts/infrahub-observability/.dashboards-source @@ -0,0 +1,14 @@ +# Tracks the upstream source for vendored Grafana dashboards. +# Updated by scripts/sync-dashboards.sh and reviewed in PRs. +# The chart's appVersion should match the infrahub release recorded here. +repo: opsmill/infrahub +ref: infrahub-v1.9.3 +path: development/grafana/provisioning/dashboards +files: + - infrahub_monitoring.json + - neo4j_monitoring.json + - rabbitmq_instance_monitoring.json + - container_resources.json + - loki_monitoring.json + - prefect_platform_overview.json + - prefect_flow_run_overview.json diff --git a/charts/infrahub-observability/.helmignore b/charts/infrahub-observability/.helmignore new file mode 100644 index 0000000..0e8a0eb --- /dev/null +++ b/charts/infrahub-observability/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/charts/infrahub-observability/Chart.yaml b/charts/infrahub-observability/Chart.yaml new file mode 100644 index 0000000..ece51e1 --- /dev/null +++ b/charts/infrahub-observability/Chart.yaml @@ -0,0 +1,50 @@ +--- +apiVersion: v2 +name: infrahub-observability +description: Observability stack (Alloy, Loki, Prometheus, Tempo, Grafana) for Infrahub on Kubernetes +icon: https://github.com/opsmill/infrahub/raw/develop/frontend/app/public/favicons/logo512.png +home: https://github.com/opsmill/infrahub-helm +sources: + - https://github.com/opsmill/infrahub + - https://github.com/opsmill/infrahub-helm +keywords: + - infrahub + - observability + - grafana + - loki + - prometheus + - tempo + - alloy + - kubernetes +maintainers: + - name: OpsMill + url: https://github.com/opsmill +type: application +version: 0.1.0 +appVersion: "1.9.3" + +dependencies: + - name: alloy + version: "1.0.3" + repository: "https://grafana.github.io/helm-charts" + condition: alloy.enabled + - name: loki + version: "6.16.0" + repository: "https://grafana.github.io/helm-charts" + condition: loki.enabled + - name: tempo + version: "1.10.0" + repository: "https://grafana.github.io/helm-charts" + condition: tempo.enabled + - name: grafana + version: "8.5.0" + repository: "https://grafana.github.io/helm-charts" + condition: grafana.enabled + - name: prometheus + version: "25.27.0" + repository: "https://prometheus-community.github.io/helm-charts" + condition: prometheus.enabled + - name: prometheus-node-exporter + version: "4.36.0" + repository: "https://prometheus-community.github.io/helm-charts" + condition: prometheus-node-exporter.enabled diff --git a/charts/infrahub-observability/README.md b/charts/infrahub-observability/README.md new file mode 100644 index 0000000..e99a627 --- /dev/null +++ b/charts/infrahub-observability/README.md @@ -0,0 +1,112 @@ +# infrahub-observability + +Observability stack (Alloy, Loki, Prometheus, Tempo, Grafana) for Infrahub on Kubernetes + +**Homepage:** + +This chart deploys the same observability stack that Infrahub ships for local +Docker Compose development — Grafana Alloy (logs + metrics), Loki (logs), +Prometheus (metrics + remote-write receiver), Tempo (traces), Grafana +(visualization), and the Prefect prometheus exporter — onto Kubernetes. It is +designed to be installed alongside the [infrahub](../infrahub) or +[infrahub-enterprise](../infrahub-enterprise) chart in the same namespace. + +## Prerequisites + +- Kubernetes 1.24+ +- Helm 3.0+ +- PV provisioner support in the underlying infrastructure (Loki, Prometheus, + Tempo and Grafana enable persistence by default) +- The infrahub chart is installed in the same namespace, or its release + name is supplied via `global.infrahubReleaseName` + +## Installing the Chart + +```sh +helm dependency update charts/infrahub-observability +helm install obs charts/infrahub-observability -n infrahub +``` + +## Wiring infrahub to send traces to Tempo + +The infrahub chart exposes a `global.tracing` block that emits the +`INFRAHUB_TRACE_*` env vars on the server and task-worker deployments. Point +it at the Tempo service this chart creates: + +```yaml +# infrahub values +global: + tracing: + enabled: true + endpoint: "obs-tempo:4317" # -tempo:4317 (host:port for grpc) + protocol: grpc + insecure: true +``` + +## Dashboards + +Seven Grafana dashboards are vendored from the [opsmill/infrahub +repository](https://github.com/opsmill/infrahub) at the version recorded in +`.dashboards-source`. The chart's `appVersion` tracks this version. The +dashboards are kept in sync with upstream automatically as part of the +infrahub release flow; for local development: + +```sh +make sync-dashboards REF=v1.9.3 +``` + +## Uninstalling the Chart + +```sh +helm delete obs -n infrahub +``` + +Persistent volumes for Loki, Prometheus, Tempo and Grafana are retained by +default. Delete the PVCs explicitly if you want a clean slate. + +## Maintainers + +| Name | Email | Url | +| ---- | ------ | --- | +| OpsMill | | | + +## Requirements + +| Repository | Name | Version | +|------------|------|---------| +| https://grafana.github.io/helm-charts | alloy | 1.0.3 | +| https://grafana.github.io/helm-charts | grafana | 8.5.0 | +| https://grafana.github.io/helm-charts | loki | 6.16.0 | +| https://grafana.github.io/helm-charts | tempo | 1.10.0 | +| https://prometheus-community.github.io/helm-charts | prometheus | 25.27.0 | +| https://prometheus-community.github.io/helm-charts | prometheus-node-exporter | 4.36.0 | + +## Values + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| alloy | object | `{"alloy":{"clustering":{"enabled":false},"configMap":{"create":false,"key":"config.alloy","name":""},"mounts":{"dockercontainers":false,"varlog":true}},"cadvisor":{"enabled":true},"controller":{"type":"daemonset"},"enabled":true}` | -------------------------------------------------------------------------- | +| alloy.alloy.configMap.name | string | `""` | Name of the ConfigMap that holds Alloy's config.alloy file. Resolved at render time via the helper. | +| alloy.cadvisor | object | `{"enabled":true}` | Scrape kubelet cAdvisor for per-container CPU/memory/network/fs metrics. Requires the Alloy ServiceAccount to have `get nodes/proxy`, which the subchart's default RBAC already grants. Disable if your cluster's policy forbids that permission; the Container Resources and Neo4j Monitoring dashboards will then show no data. | +| global | object | `{"commonAnnotations":{},"commonLabels":{},"imagePullPolicy":"IfNotPresent","imagePullSecrets":[],"infrahubNamespace":"","infrahubReleaseName":"infrahub","kubernetesClusterDomain":"cluster.local","podLabels":{}}` | Global values shared across all sub-charts and templates in this chart. | +| global.commonAnnotations | object | `{}` | Annotations added to every resource managed by this chart. | +| global.commonLabels | object | `{}` | Labels added to every resource managed by this chart. | +| global.imagePullPolicy | string | `"IfNotPresent"` | Default imagePullPolicy for in-chart workloads (currently only the Prefect exporter). | +| global.imagePullSecrets | list | `[]` | Image pull secrets propagated to in-chart workloads. | +| global.infrahubNamespace | string | `""` | Namespace where the sibling infrahub release lives. Empty string means the same namespace as this release. | +| global.infrahubReleaseName | string | `"infrahub"` | Release name of the sibling infrahub chart. Used by the Prefect exporter to derive the default PREFECT_API_URL and by Alloy when scoping discovery. | +| global.kubernetesClusterDomain | string | `"cluster.local"` | Cluster DNS domain. Used for fully-qualified service names if needed. | +| global.podLabels | object | `{}` | Pod-level labels merged into the standard selector labels. | +| grafana | object | `{"adminPassword":"admin","adminUser":"admin","enabled":true,"env":{"GF_LOG_LEVEL":"warn","GF_USERS_ALLOW_SIGN_UP":"false"},"ingress":{"enabled":false},"persistence":{"enabled":true,"size":"5Gi"},"service":{"type":"ClusterIP"},"sidecar":{"dashboards":{"enabled":true,"label":"grafana_dashboard","labelValue":"1","searchNamespace":"ALL"},"datasources":{"enabled":true,"label":"grafana_datasource","labelValue":"1","searchNamespace":"ALL"}}}` | -------------------------------------------------------------------------- | +| grafana.adminPassword | string | `"admin"` | Default password matches docker-compose dev parity. Override via `grafana.admin.existingSecret` in production. | +| loki | object | `{"backend":{"replicas":0},"chunksCache":{"enabled":false},"deploymentMode":"SingleBinary","enabled":true,"gateway":{"enabled":false},"loki":{"auth_enabled":false,"commonConfig":{"replication_factor":1},"compactor":{"compaction_interval":"10m","delete_request_store":"filesystem","retention_delete_delay":"2h","retention_delete_worker_count":100,"retention_enabled":true,"working_directory":"/var/loki/compactor"},"limits_config":{"allow_structured_metadata":true,"cardinality_limit":100000,"ingestion_burst_size_mb":64,"ingestion_rate_mb":32,"max_entries_limit_per_query":10000,"max_global_streams_per_user":15000,"max_query_lookback":"24h","max_streams_per_user":20000,"per_stream_rate_limit":"3MB","per_stream_rate_limit_burst":"5MB","reject_old_samples":true,"reject_old_samples_max_age":"168h","retention_period":"24h"},"schemaConfig":{"configs":[{"from":"2024-04-01","index":{"period":"24h","prefix":"loki_index_"},"object_store":"filesystem","schema":"v13","store":"tsdb"}]},"server":{"log_level":"warn"},"storage":{"type":"filesystem"}},"lokiCanary":{"enabled":false},"read":{"replicas":0},"resultsCache":{"enabled":false},"singleBinary":{"persistence":{"enabled":true,"size":"10Gi"},"replicas":1},"test":{"enabled":false},"write":{"replicas":0}}` | -------------------------------------------------------------------------- | +| prefectExporter | object | `{"affinity":{},"enabled":true,"image":{"pullPolicy":"","repository":"prefecthq/prometheus-prefect-exporter","tag":"3.3.0"},"logLevel":"WARNING","nodeSelector":{},"podAnnotations":{},"prefectApiUrl":"","replicas":1,"resources":{},"securityContext":{"runAsNonRoot":true,"runAsUser":1000},"service":{"port":8000,"type":"ClusterIP"},"tolerations":[]}` | -------------------------------------------------------------------------- | +| prefectExporter.enabled | bool | `true` | Enable the Prefect prometheus exporter sidecar Deployment. | +| prefectExporter.logLevel | string | `"WARNING"` | Log level passed to the exporter. | +| prefectExporter.prefectApiUrl | string | `""` | PREFECT_API_URL. Empty string defaults to the task-manager service of the sibling infrahub release (see _helpers.tpl). | +| prometheus | object | `{"alertmanager":{"enabled":false},"enabled":true,"kube-state-metrics":{"enabled":false},"prometheus-node-exporter":{"enabled":false},"prometheus-pushgateway":{"enabled":false},"server":{"extraArgs":{"log.level":"warn","web.enable-remote-write-receiver":""},"persistentVolume":{"enabled":true,"size":"20Gi"},"retention":"96h"},"serverFiles":{"prometheus.yml":{"scrape_configs":[]}}}` | -------------------------------------------------------------------------- | +| prometheus-node-exporter | object | `{"enabled":true}` | -------------------------------------------------------------------------- | +| tempo | object | `{"enabled":true,"persistence":{"enabled":true,"size":"10Gi"},"tempo":{"metricsGenerator":{"enabled":false},"receivers":{"otlp":{"protocols":{"grpc":{"endpoint":"0.0.0.0:4317"},"http":{}}}},"retention":"96h"}}` | -------------------------------------------------------------------------- | + +---------------------------------------------- +Autogenerated from chart metadata using [helm-docs v1.14.2](https://github.com/norwoodj/helm-docs/releases/v1.14.2) diff --git a/charts/infrahub-observability/README.md.gotmpl b/charts/infrahub-observability/README.md.gotmpl new file mode 100644 index 0000000..7902c65 --- /dev/null +++ b/charts/infrahub-observability/README.md.gotmpl @@ -0,0 +1,72 @@ +{{ template "chart.header" . }} +{{ template "chart.description" . }} + +{{ template "chart.homepageLine" . }} + +This chart deploys the same observability stack that Infrahub ships for local +Docker Compose development — Grafana Alloy (logs + metrics), Loki (logs), +Prometheus (metrics + remote-write receiver), Tempo (traces), Grafana +(visualization), and the Prefect prometheus exporter — onto Kubernetes. It is +designed to be installed alongside the [infrahub](../infrahub) or +[infrahub-enterprise](../infrahub-enterprise) chart in the same namespace. + +## Prerequisites + +- Kubernetes 1.24+ +- Helm 3.0+ +- PV provisioner support in the underlying infrastructure (Loki, Prometheus, + Tempo and Grafana enable persistence by default) +- The infrahub chart is installed in the same namespace, or its release + name is supplied via `global.infrahubReleaseName` + +## Installing the Chart + +```sh +helm dependency update charts/infrahub-observability +helm install obs charts/infrahub-observability -n infrahub +``` + +## Wiring infrahub to send traces to Tempo + +The infrahub chart exposes a `global.tracing` block that emits the +`INFRAHUB_TRACE_*` env vars on the server and task-worker deployments. Point +it at the Tempo service this chart creates: + +```yaml +# infrahub values +global: + tracing: + enabled: true + endpoint: "obs-tempo:4317" # -tempo:4317 (host:port for grpc) + protocol: grpc + insecure: true +``` + +## Dashboards + +Seven Grafana dashboards are vendored from the [opsmill/infrahub +repository](https://github.com/opsmill/infrahub) at the version recorded in +`.dashboards-source`. The chart's `appVersion` tracks this version. The +dashboards are kept in sync with upstream automatically as part of the +infrahub release flow; for local development: + +```sh +make sync-dashboards REF=v1.9.3 +``` + +## Uninstalling the Chart + +```sh +helm delete obs -n infrahub +``` + +Persistent volumes for Loki, Prometheus, Tempo and Grafana are retained by +default. Delete the PVCs explicitly if you want a clean slate. + +{{ template "chart.maintainersSection" . }} + +{{ template "chart.requirementsSection" . }} + +{{ template "chart.valuesSection" . }} + +{{ template "helm-docs.versionFooter" . }} diff --git a/charts/infrahub-observability/dashboards/container_resources.json b/charts/infrahub-observability/dashboards/container_resources.json new file mode 100644 index 0000000..5a2c15b --- /dev/null +++ b/charts/infrahub-observability/dashboards/container_resources.json @@ -0,0 +1,1401 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 4, + "links": [], + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 2007, + "panels": [], + "repeat": "datasource_prometheus", + "title": "Node Resources", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "Bps" + }, + "overrides": [ + { + "__systemRef": "hideSeriesFrom", + "matcher": { + "id": "byNames", + "options": { + "mode": "exclude", + "names": [ + "Network Traffic" + ], + "prefix": "All except:", + "readOnly": true + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": false, + "tooltip": false, + "viz": true + } + } + ] + } + ] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 0, + "y": 1 + }, + "id": 2001, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "localprometheus" + }, + "editorMode": "code", + "expr": "sum(rate(container_network_receive_bytes_total{container!=\"\", image!=\"\"}[1m])) by (id)", + "legendFormat": "Network Traffic", + "range": true, + "refId": "A" + } + ], + "title": "Network Traffic on Node", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 4, + "y": 1 + }, + "id": 2002, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "localprometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "count(container_last_seen{namespace=\"infrahub\"} > time() - 60)", + "format": "time_series", + "instant": true, + "interval": "", + "legendFormat": "{{service}}", + "range": false, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "", + "hide": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "B" + } + ], + "title": "Running Containers", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 8, + "y": 1 + }, + "id": 2005, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "expr": "node_memory_MemAvailable_bytes", + "legendFormat": "Available Memory", + "refId": "A" + } + ], + "title": "Available Memory on Node", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 12, + "y": 1 + }, + "id": 2003, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "expr": "100 - (avg by (instance) (irate(node_cpu_seconds_total{mode=\"idle\"}[5m])) * 100)", + "legendFormat": "CPU Usage", + "refId": "A" + } + ], + "title": "CPU Usage on Node", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "IOps" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 16, + "y": 1 + }, + "id": 2006, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "expr": "rate(node_disk_io_time_seconds_total[5m])", + "legendFormat": "Disk I/O", + "refId": "A" + } + ], + "title": "Disk I/O on Node", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 20, + "y": 1 + }, + "id": 2004, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "expr": "node_filesystem_size_bytes - node_filesystem_avail_bytes", + "legendFormat": "Disk Space", + "refId": "A" + } + ], + "title": "Free & Used Disk Space on Node", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 6 + }, + "id": 12, + "panels": [], + "title": "Basic Resources", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 1, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 7 + }, + "id": 1, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "sum(rate(container_cpu_usage_seconds_total{container!=\"\"}[5m])) by (container) * 100", + "legendFormat": "{{container}}", + "range": true, + "refId": "A" + } + ], + "title": "CPU Usage per Container", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 7 + }, + "id": 2, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "container_memory_usage_bytes{container!=\"\"}", + "legendFormat": "{{container}}", + "range": true, + "refId": "A" + } + ], + "title": "Memory Usage per Container", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 15 + }, + "id": 13, + "panels": [], + "title": "Network Metrics", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "Bps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 16 + }, + "id": 3, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "rate(container_network_receive_bytes_total{container!=\"\"}[5m])", + "legendFormat": "{{container}}", + "range": true, + "refId": "A" + } + ], + "title": "Incoming Network Traffic", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "Bps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 16 + }, + "id": 4, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "rate(container_network_transmit_bytes_total{container!=\"\"}[5m])", + "legendFormat": "{{container}}", + "range": true, + "refId": "A" + } + ], + "title": "Outgoing Network Traffic", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 24 + }, + "id": 14, + "panels": [], + "title": "Storage Metrics", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 25 + }, + "id": 5, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "sum (container_fs_usage_bytes{container!=\"\"}) by (container)", + "legendFormat": "{{container}}", + "range": true, + "refId": "A" + } + ], + "title": "Storage Usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "iops" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 25 + }, + "id": 6, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "sum (\n rate(container_fs_reads_total{container!=\"\"}[5m])\n + rate(container_fs_writes_total{container!=\"\"}[5m])\n) by (container) ", + "legendFormat": "{{container}}", + "range": true, + "refId": "A" + } + ], + "title": "Disk IOPS (Reads + Writes)", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 33 + }, + "id": 15, + "panels": [], + "title": "System Metrics", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 34 + }, + "id": 9, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "time() - container_start_time_seconds{container!=\"\"}", + "legendFormat": "{{container}}", + "range": true, + "refId": "A" + } + ], + "title": "Container Uptime", + "type": "timeseries" + } + ], + "preload": false, + "refresh": "1m", + "schemaVersion": 40, + "tags": [ + "docker", + "containers", + "monitoring" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus (Local)", + "value": "localprometheus" + }, + "includeAll": false, + "label": "Datasource", + "name": "datasource_prometheus", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + } + ] + }, + "time": { + "from": "now-3h", + "to": "now" + }, + "timepicker": {}, + "timezone": "utc", + "title": "Docker Containers Monitoring", + "uid": "bd01c756-64dd-40e5-ba26-d5c254506598", + "version": 7, + "weekStart": "" +} \ No newline at end of file diff --git a/charts/infrahub-observability/dashboards/infrahub_monitoring.json b/charts/infrahub-observability/dashboards/infrahub_monitoring.json new file mode 100644 index 0000000..2f26b81 --- /dev/null +++ b/charts/infrahub-observability/dashboards/infrahub_monitoring.json @@ -0,0 +1,2527 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 2, + "id": 5, + "links": [], + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 207, + "panels": [], + "title": "Overview", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "green", + "value": 90 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 12, + "x": 0, + "y": 1 + }, + "id": 200, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "sum(rate(infrahub_requests_total{status_code=~\"2..\"}[5m])) / sum(rate(infrahub_requests_total[5m])) * 100", + "legendFormat": "Success Rate", + "range": true, + "refId": "A" + } + ], + "title": "Service Availability (Success Rate)", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 12, + "x": 12, + "y": 1 + }, + "id": 202, + "options": { + "legend": { + "calcs": [ + "min", + "mean", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "sum(rate(infrahub_requests_total{status_code=~\"4..\"}[5m])) / sum(rate(infrahub_requests_total[5m])) * 100", + "interval": "", + "legendFormat": "4XX Error Rate", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "sum(rate(infrahub_requests_total{status_code=~\"5..\"}[5m])) / sum(rate(infrahub_requests_total[5m])) * 100", + "interval": "", + "legendFormat": "5XX Error Rate", + "range": true, + "refId": "B" + } + ], + "title": "Error Rates (4XX & 5XX)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 12, + "x": 0, + "y": 7 + }, + "id": 201, + "options": { + "legend": { + "calcs": [ + "min", + "mean", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "avg(rate(infrahub_request_duration_seconds_sum[5m])) / avg(rate(infrahub_request_duration_seconds_count[5m]))", + "interval": "", + "legendFormat": "HTTP Avg Response Time", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "avg(rate(infrahub_graphql_duration_seconds_sum[5m])) / avg(rate(infrahub_graphql_duration_seconds_count[5m]))", + "interval": "", + "legendFormat": "GraphQL Avg Response Time", + "range": true, + "refId": "B" + } + ], + "title": "Average Response Time (HTTP & GraphQL)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 12, + "x": 12, + "y": 7 + }, + "id": 203, + "options": { + "legend": { + "calcs": [ + "min", + "mean", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "sum(rate(infrahub_requests_total[5m]))", + "interval": "", + "legendFormat": "Total Requests", + "range": true, + "refId": "A" + } + ], + "title": "Request Volume (Requests per Second)", + "type": "timeseries" + }, + { + "datasource": { + "type": "loki", + "uid": "${datasource_loki}" + }, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 13 + }, + "id": 210, + "options": { + "dedupStrategy": "none", + "enableLogDetails": true, + "prettifyLogMessage": false, + "showCommonLabels": false, + "showLabels": false, + "showTime": false, + "sortOrder": "Descending", + "wrapLogMessage": false + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "${datasource_loki}" + }, + "editorMode": "code", + "expr": "{container=~\".*(database|server|task|git|cache|queue).*\", container!~\".*run.*\", container=~\"$container\" , level=~\"(?i)WARN.*\"}| line_format \"{{__timestamp__ | date `2006-01-02 15:04:05.00` }}\\t{{.container| replace `infrahub-` ``}}\\t{{.level}}\\t{{.logger}}\\t{{.message}}\"", + "queryType": "range", + "refId": "A" + } + ], + "title": "Warning logs", + "type": "logs" + }, + { + "datasource": { + "type": "loki", + "uid": "${datasource_loki}" + }, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 13 + }, + "id": 209, + "options": { + "dedupStrategy": "none", + "enableLogDetails": true, + "prettifyLogMessage": false, + "showCommonLabels": false, + "showLabels": false, + "showTime": false, + "sortOrder": "Descending", + "wrapLogMessage": false + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "${datasource_loki}" + }, + "editorMode": "code", + "expr": "{container=~\".*(database|server|task|git|cache).*\", container!~\".*run.*\", container=~\"$container\" , level=~\"(?i)ERR.*|CRIT|FATAL.*\"}| line_format \"{{__timestamp__ | date `2006-01-02 15:04:05.00` }}\\t{{.container| replace `infrahub-` ``}}\\t{{.level}}\\t{{.logger}}\\t{{.message}}\"", + "queryType": "range", + "refId": "A" + } + ], + "title": "Error/Critical logs", + "type": "logs" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 21 + }, + "id": 46, + "panels": [], + "repeat": "app_name", + "title": "Details Requests [$app_name]", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMax": 1, + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "dashed" + } + }, + "mappings": [], + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 10 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": " - " + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": true, + "tooltip": false, + "viz": false + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 22 + }, + "id": 20, + "options": { + "legend": { + "calcs": [ + "min", + "mean", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum by(path, method) (infrahub_requests_total{app_name=\"$app_name\", status_code=~\"4.+\", path!=\"/metrics\"}) / sum by(path, method) (infrahub_requests_total{app_name=\"$app_name\", path!=\"/metrics\"}) or vector(0)", + "instant": false, + "interval": "", + "legendFormat": "{{methode}} - {{path}}", + "range": true, + "refId": "A" + } + ], + "title": "Percent of 4XX Requests", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMax": 1, + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "dashed" + } + }, + "mappings": [], + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 10 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": " - " + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": true, + "tooltip": false, + "viz": false + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 22 + }, + "id": 66, + "options": { + "legend": { + "calcs": [ + "min", + "mean", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum by(path, method) (infrahub_requests_total{app_name=\"$app_name\", status_code=~\"5.+\", path!=\"/metrics\"}) / sum by(path, method) (infrahub_requests_total{app_name=\"$app_name\", path!=\"/metrics\"}) or vector(0)", + "instant": false, + "interval": "", + "legendFormat": "{{method}} - {{path}}", + "range": true, + "refId": "A" + } + ], + "title": "Percent of 5XX Requests", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "stepBefore", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 30 + }, + "id": 106, + "options": { + "legend": { + "calcs": [ + "min", + "mean", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "histogram_quantile($percentile, sum(rate(infrahub_request_duration_seconds_bucket[1m])) by (le, method, path, status_code))", + "interval": "", + "legendFormat": "{{method}} {{path}} - {{status_code}}", + "range": true, + "refId": "A" + } + ], + "title": "HTTP Request Duration ($percentile)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 30 + }, + "id": 12, + "options": { + "legend": { + "calcs": [ + "min", + "mean", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "rate(infrahub_requests_total{app_name=\"$app_name\"}[$__rate_interval])", + "interval": "", + "legendFormat": "{{path}}", + "range": true, + "refId": "A" + } + ], + "title": "Request Per Sec", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 38 + }, + "id": 113, + "options": { + "legend": { + "calcs": [ + "min", + "mean", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "infrahub_requests_in_progress{app_name=\"$app_name\"}", + "interval": "", + "legendFormat": "{{method}}", + "range": true, + "refId": "A" + } + ], + "title": "Requests In Progress", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 46 + }, + "id": 47, + "panels": [], + "repeat": "app_name", + "title": "Details DB Query/GraphQL [$app_name]", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 47 + }, + "id": 107, + "options": { + "legend": { + "calcs": [ + "min", + "mean", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "histogram_quantile($percentile, sum(rate(infrahub_graphql_duration_seconds_bucket[1m])) by (le, branch, name, operation, query_id, type))", + "interval": "", + "legendFormat": "{{branch}} - {{name}} - {{operation}}", + "range": true, + "refId": "A" + } + ], + "title": "GraphQL Query Duration ($percentile percentile)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 47 + }, + "id": 103, + "options": { + "legend": { + "calcs": [ + "min", + "mean", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "histogram_quantile($percentile, sum(rate(infrahub_db_query_execution_seconds_bucket[1m])) by (le, query, type))", + "interval": "", + "legendFormat": "{{query}} - {{type}}", + "range": true, + "refId": "A" + } + ], + "title": "DB Query Execution Time ($percentile)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "localprometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": -1, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "stepAfter", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 8, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "yellow", + "value": 1 + }, + { + "color": "red", + "value": 5 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 55 + }, + "id": 208, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "editorMode": "code", + "expr": "increase(infrahub_db_query_execution_seconds_count[5m]) - ignoring(le) increase(infrahub_db_query_execution_seconds_bucket{le=\"0.5\"}[5m]) > 0", + "legendFormat": "{{query}} {{type}}", + "range": true, + "refId": "A" + } + ], + "title": "Number Slow Queries (>0.5s)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "bars", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 62 + }, + "id": 104, + "options": { + "legend": { + "calcs": [ + "min", + "mean", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "histogram_quantile($percentile, sum(rate(infrahub_lock_reserved_duration_seconds_bucket[1m])) by (le, lock, type))", + "interval": "", + "legendFormat": "{{lock}} - {{type}}", + "range": true, + "refId": "A" + } + ], + "title": "Lock Reserved Duration ($percentile)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "bars", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 62 + }, + "id": 102, + "options": { + "legend": { + "calcs": [ + "min", + "mean", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "histogram_quantile($percentile, sum(rate(infrahub_lock_acquire_seconds_bucket[1m])) by (le, lock, type))", + "interval": "", + "legendFormat": "{{lock}} - {{type}}", + "range": true, + "refId": "A" + } + ], + "title": "Lock Acquire Time ($percentile)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 70 + }, + "id": 108, + "options": { + "legend": { + "calcs": [ + "min", + "mean", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "histogram_quantile($percentile, sum(rate(infrahub_graphql_response_size_bytes_bucket[1m])) by (le, branch, name, operation, query_id, type))", + "interval": "", + "legendFormat": "{{branch}} - {{name}} - {{operation}}", + "range": true, + "refId": "A" + } + ], + "title": "GraphQL Response Size ($percentile)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 70 + }, + "id": 112, + "options": { + "legend": { + "calcs": [ + "min", + "mean", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "histogram_quantile($percentile, sum(rate(infrahub_graphql_query_objects_bucket[1m])) by (le, branch, name, operation, query_id, type))", + "interval": "", + "legendFormat": "{{branch}} - {{name}} - {{operation}}", + "range": true, + "refId": "A" + } + ], + "title": "GraphQL Query Objects ($percentile)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 78 + }, + "id": 105, + "options": { + "legend": { + "calcs": [ + "min", + "mean", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "histogram_quantile($percentile, sum(rate(infrahub_graphql_generate_schema_bucket[1m])) by (le, branch))", + "interval": "", + "legendFormat": "{{branch}}", + "range": true, + "refId": "A" + } + ], + "title": "GraphQL Schema Generation Time ($percentile)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 78 + }, + "id": 110, + "options": { + "legend": { + "calcs": [ + "min", + "mean", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "histogram_quantile($percentile, sum(rate(infrahub_graphql_query_height_bucket[1m])) by (le, branch, name, operation, query_id, type))", + "interval": "", + "legendFormat": "{{branch}} - {{name}} - {{operation}}", + "range": true, + "refId": "A" + } + ], + "title": "GraphQL Query Height ($percentile)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 86 + }, + "id": 109, + "options": { + "legend": { + "calcs": [ + "min", + "mean", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "histogram_quantile($percentile, sum(rate(infrahub_graphql_query_depth_bucket[1m])) by (le, branch, name, operation, query_id, type))", + "interval": "", + "legendFormat": "{{branch}} - {{name}} - {{operation}}", + "range": true, + "refId": "A" + } + ], + "title": "GraphQL Query Depth ($percentile)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 86 + }, + "id": 111, + "options": { + "legend": { + "calcs": [ + "min", + "mean", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "histogram_quantile($percentile, sum(rate(infrahub_graphql_top_level_queries_bucket[1m])) by (le, branch, name, operation, query_id, type))", + "interval": "", + "legendFormat": "{{branch}} - {{name}} - {{operation}}", + "range": true, + "refId": "A" + } + ], + "title": "GraphQL Top Level Queries ($percentile)", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 94 + }, + "id": 23, + "panels": [], + "repeat": "app_name", + "title": "Logs [$app_name]", + "type": "row" + }, + { + "datasource": { + "type": "loki", + "uid": "${datasource_loki}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "bars", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "dashed" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 17, + "w": 6, + "x": 0, + "y": 95 + }, + "id": 14, + "options": { + "legend": { + "calcs": [ + "min", + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "${datasource_loki}" + }, + "editorMode": "code", + "expr": "sum by (container, level) (rate({container=~\".*(queue|database|server|git|task|cache).*\", container!~\".*run.*\", container=~\"$container\"} | label_format container=`{{.container| replace \"infrahub-\" \"\"}}`| label_format level=`{{.level|lower}}` | level != \"\" [1m]))", + "hide": false, + "legendFormat": "{{ container }} - {{level}}", + "queryType": "range", + "refId": "A" + } + ], + "title": "Logs Type Rate (per Container and Level)", + "type": "timeseries" + }, + { + "datasource": { + "type": "loki", + "uid": "${datasource_loki}" + }, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 17, + "w": 18, + "x": 6, + "y": 95 + }, + "id": 2, + "options": { + "dedupStrategy": "none", + "enableLogDetails": true, + "prettifyLogMessage": false, + "showCommonLabels": false, + "showLabels": false, + "showTime": false, + "sortOrder": "Descending", + "wrapLogMessage": true + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "${datasource_loki}" + }, + "editorMode": "code", + "expr": "{container=~\".*(queue|database|server|git|task|cache).*\", container!~\".*run.*\", container=~\"$container\"} | line_format \"{{__timestamp__ | date `2006-01-02 15:04:05.00` }}\\t{{.container| replace `infrahub-` ``}}\\t{{.level}}\\t{{.logger}}\\t{{.message}}\"", + "hide": false, + "legendFormat": "", + "queryType": "range", + "refId": "A" + } + ], + "title": "Logs", + "type": "logs" + } + ], + "preload": false, + "refresh": "", + "schemaVersion": 40, + "tags": [], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus (Local)", + "value": "localprometheus" + }, + "includeAll": false, + "label": "Prometheus", + "name": "datasource_prometheus", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "current": { + "text": "Loki (Local)", + "value": "localloki" + }, + "includeAll": false, + "label": "Loki", + "name": "datasource_loki", + "options": [], + "query": "loki", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "current": { + "text": "All", + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "definition": "label_values(app_name)", + "includeAll": true, + "label": "Application", + "multi": true, + "name": "app_name", + "options": [], + "query": { + "query": "label_values(app_name)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": ".*infrahub.*", + "type": "query" + }, + { + "current": { + "text": "All", + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "loki", + "uid": "${datasource_loki}" + }, + "definition": "", + "includeAll": true, + "label": "Container", + "multi": true, + "name": "container", + "options": [], + "query": { + "label": "container", + "refId": "LokiVariableQueryEditor-VariableQuery", + "stream": "", + "type": 1 + }, + "refresh": 1, + "regex": "^infrahub.+", + "sort": 1, + "type": "query" + }, + { + "current": { + "text": "All", + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "loki", + "uid": "${datasource_loki}" + }, + "definition": "", + "hide": 2, + "includeAll": true, + "label": "Loki Job", + "multi": true, + "name": "job_loki", + "options": [], + "query": { + "label": "job", + "refId": "LokiVariableQueryEditor-VariableQuery", + "stream": "", + "type": 1 + }, + "refresh": 1, + "regex": "", + "type": "query" + }, + { + "current": { + "text": "All", + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "definition": "label_values(job)", + "hide": 2, + "includeAll": true, + "label": "Job Prometheus", + "multi": true, + "name": "job_prometheus", + "options": [], + "query": { + "query": "label_values(job)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "type": "query" + }, + { + "current": { + "text": "0.95", + "value": "0.95" + }, + "includeAll": false, + "label": "percentile", + "name": "percentile", + "options": [ + { + "selected": false, + "text": "p99", + "value": "0.99" + }, + { + "selected": true, + "text": "p95", + "value": "0.95" + }, + { + "selected": false, + "text": "p90", + "value": "0.90" + } + ], + "query": "p99 : 0.99 , p95 : 0.95 , p90 : 0.90", + "type": "custom" + } + ] + }, + "time": { + "from": "now-3h", + "to": "now" + }, + "timepicker": {}, + "timezone": "utc", + "title": "Infrahub Monitoring", + "uid": "Kn5xm-gZ3", + "version": 55, + "weekStart": "" +} \ No newline at end of file diff --git a/charts/infrahub-observability/dashboards/loki_monitoring.json b/charts/infrahub-observability/dashboards/loki_monitoring.json new file mode 100644 index 0000000..66e1710 --- /dev/null +++ b/charts/infrahub-observability/dashboards/loki_monitoring.json @@ -0,0 +1,13898 @@ +{ + "annotations": { + "list": [ + { + "$$hashKey": "object:7", + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 2, + "id": 7, + "links": [], + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 102, + "panels": [], + "title": "Loki General Info", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 0, + "y": 1 + }, + "id": 59, + "interval": "$interval", + "maxDataPoints": 1, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^version$/", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "loki_build_info{job=~\"$job\",instance=~\"$instance\"}", + "format": "table", + "instant": false, + "interval": "", + "legendFormat": "{{version}}", + "range": true, + "refId": "A" + } + ], + "title": "Loki Version", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "decimals": 0, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 3, + "y": 1 + }, + "id": 10, + "interval": "$interval", + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "sum(loki_internal_log_messages_total{job=~\"$job\",instance=~\"$instance\"})", + "instant": false, + "interval": "$interval", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Internal Log Messages", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "links": [], + "mappings": [ + { + "options": { + "null": { + "index": 0, + "text": "N/A" + } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 6, + "y": 1 + }, + "id": 80, + "interval": "$interval", + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "deprecated_flags_inuse_total{job=~\"$job\",instance=~\"$instance\"}", + "instant": false, + "interval": "$interval", + "legendFormat": "Deprecated", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "loki_experimental_features_in_use_total{job=~\"$job\",instance=~\"$instance\"}", + "hide": false, + "instant": false, + "interval": "$interval", + "legendFormat": "Experimental", + "refId": "B" + } + ], + "title": "Features Currently Set", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "links": [], + "mappings": [ + { + "options": { + "null": { + "index": 0, + "text": "N/A" + } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "semi-dark-red", + "value": 1 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 9, + "y": 1 + }, + "id": 24, + "interval": "$interval", + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "loki_panic_total{job=~\"$job\",instance=~\"$instance\"}", + "instant": false, + "interval": "$interval", + "legendFormat": "The total number of panics triggered", + "refId": "A" + } + ], + "title": "Panics", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "links": [], + "mappings": [ + { + "options": { + "null": { + "index": 0, + "text": "N/A" + } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "semi-dark-red", + "value": 0 + }, + { + "color": "green", + "value": 1 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 12, + "y": 1 + }, + "id": 112, + "interval": "$interval", + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "loki_distributor_replication_factor{job=~\"$job\",instance=~\"$instance\"}", + "instant": false, + "interval": "$interval", + "legendFormat": "The configured replication factor", + "refId": "A" + } + ], + "title": "Replication Factor", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "links": [], + "mappings": [ + { + "options": { + "null": { + "index": 0, + "text": "N/A" + } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 15, + "y": 1 + }, + "id": 81, + "interval": "$interval", + "maxDataPoints": 1, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "/.*/", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "go_info{job=~\"$job\",instance=~\"$instance\"}", + "format": "table", + "instant": false, + "interval": "$interval", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Go Version", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Value": true, + "__name__": true, + "branch": true, + "instance": true, + "job": true, + "revision": true, + "version": false + }, + "indexByName": { + "Time": 0, + "Value": 8, + "__name__": 1, + "branch": 2, + "goversion": 4, + "instance": 5, + "job": 6, + "revision": 3, + "version": 7 + }, + "renameByName": { + "branch": "", + "goversion": "Go Version", + "instance": "", + "revision": "", + "version": "Go Version" + } + } + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "links": [], + "mappings": [ + { + "options": { + "null": { + "index": 0, + "text": "N/A" + } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "semi-dark-red", + "value": 500 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 18, + "y": 1 + }, + "id": 86, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(go_goroutines{job=~\"$job\",instance=~\"$instance\"})", + "format": "table", + "instant": false, + "interval": "$interval", + "legendFormat": "Number of goroutines that currently exist", + "range": true, + "refId": "A" + } + ], + "title": "Total Goroutines", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "links": [], + "mappings": [ + { + "options": { + "null": { + "index": 0, + "text": "N/A" + } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "semi-dark-red", + "value": 25 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 21, + "y": 1 + }, + "id": 87, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(go_threads{job=~\"$job\",instance=~\"$instance\"})", + "format": "table", + "instant": false, + "interval": "$interval", + "legendFormat": "Number of OS threads created", + "range": true, + "refId": "A" + } + ], + "title": "Total Threads", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "links": [], + "mappings": [ + { + "options": { + "null": { + "index": 0, + "text": "N/A" + } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 0, + "y": 4 + }, + "id": 235, + "interval": "$interval", + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "sum(rate(loki_distributor_lines_received_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by(tenant)", + "hide": false, + "instant": false, + "interval": "$interval", + "legendFormat": "{{ tenant }}", + "refId": "A" + } + ], + "title": "Distributor Lines", + "transformations": [ + { + "id": "calculateField", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": {}, + "renameByName": { + "fake": "Default Tenant" + } + } + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "links": [], + "mappings": [ + { + "options": { + "null": { + "index": 0, + "text": "N/A" + } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 6, + "y": 4 + }, + "id": 234, + "interval": "$interval", + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "sum(rate(loki_distributor_bytes_received_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by(tenant)", + "hide": false, + "instant": false, + "interval": "$interval", + "legendFormat": "{{ tenant }}", + "refId": "A" + } + ], + "title": "Distributor Volume", + "transformations": [ + { + "id": "calculateField", + "options": { + "mode": "reduceRow", + "reduce": { + "include": [], + "reducer": "sum" + } + } + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": {}, + "renameByName": { + "Received / fake": "Received / Default Tenant", + "fake": "Default Tenant" + } + } + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "links": [], + "mappings": [ + { + "options": { + "null": { + "index": 0, + "text": "N/A" + } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 12, + "y": 4 + }, + "id": 230, + "interval": "$interval", + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "rate(loki_ingester_chunks_created_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", + "hide": false, + "instant": false, + "interval": "$interval", + "legendFormat": "Create", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "sum(rate(loki_ingester_chunks_stored_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by (tenant) ", + "hide": false, + "instant": false, + "interval": "$interval", + "legendFormat": "Store / {{ tenant }}", + "refId": "B" + } + ], + "title": "Ingester Chunk Operations", + "transformations": [ + { + "id": "calculateField", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": {}, + "renameByName": { + "Store / fake": "Store / Default Tenant" + } + } + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "links": [], + "mappings": [ + { + "options": { + "null": { + "index": 0, + "text": "N/A" + } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 18, + "y": 4 + }, + "id": 188, + "interval": "$interval", + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "sum(rate(loki_ingester_chunk_stored_bytes_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by (tenant)", + "instant": false, + "interval": "$interval", + "legendFormat": "{{ tenant }}", + "refId": "A" + } + ], + "title": "Ingester Chunks Volume", + "transformations": [ + { + "id": "calculateField", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": {}, + "renameByName": { + "fake": "Default Tenant" + } + } + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "links": [], + "mappings": [ + { + "options": { + "null": { + "index": 0, + "text": "N/A" + } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 0, + "y": 10 + }, + "id": 23, + "interval": "$interval", + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "sum(loki_store_series_total{job=~\"$job\",instance=~\"$instance\"}) by(status)", + "instant": false, + "interval": "$interval", + "legendFormat": "{{ status }}", + "refId": "A" + } + ], + "title": "Store Series", + "transformations": [ + { + "id": "calculateField", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": {}, + "renameByName": { + "discarded": "Discarded", + "matched": "Matched" + } + } + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "links": [], + "mappings": [ + { + "options": { + "null": { + "index": 0, + "text": "N/A" + } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 3, + "y": 10 + }, + "id": 131, + "interval": "$interval", + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "sum(loki_store_chunks_downloaded_total{job=~\"$job\",instance=~\"$instance\"}) by(status)", + "instant": false, + "interval": "$interval", + "legendFormat": "{{ status }}", + "refId": "A" + } + ], + "title": "Store Chunks Downloaded", + "transformations": [ + { + "id": "calculateField", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": {}, + "renameByName": { + "discarded": "Discarded", + "matched": "Matched" + } + } + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "links": [], + "mappings": [ + { + "options": { + "null": { + "index": 0, + "text": "N/A" + } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 6, + "y": 10 + }, + "id": 137, + "interval": "$interval", + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "sum(loki_index_chunk_refs_total{job=~\"$job\",instance=~\"$instance\"}) by(status)", + "instant": false, + "interval": "$interval", + "legendFormat": "{{ status }}", + "refId": "A" + } + ], + "title": "Index Chunks Downloaded", + "transformations": [ + { + "id": "calculateField", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": {}, + "renameByName": { + "discarded": "Discarded", + "matched": "Matched" + } + } + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "links": [], + "mappings": [ + { + "options": { + "null": { + "index": 0, + "text": "N/A" + } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 12, + "y": 10 + }, + "id": 191, + "interval": "$interval", + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "loki_ingester_memory_chunks{job=~\"$job\",instance=~\"$instance\"}", + "hide": false, + "instant": false, + "interval": "$interval", + "legendFormat": "Chunks in memory", + "refId": "B" + } + ], + "title": "Ingester Chunks In Memory", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "links": [], + "mappings": [ + { + "options": { + "null": { + "index": 0, + "text": "N/A" + } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 15, + "y": 10 + }, + "id": 192, + "interval": "$interval", + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "sum(loki_ingester_memory_streams{job=~\"$job\",instance=~\"$instance\"}) by(tenant)", + "instant": false, + "interval": "$interval", + "legendFormat": "{{ tenant }}", + "refId": "A" + } + ], + "title": "Ingester Streams in Memory", + "transformations": [ + { + "id": "calculateField", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "Time": 0, + "Total": 2, + "fake": 1 + }, + "renameByName": { + "discarded": "Discarded", + "fake": "Default Tenant", + "matched": "Matched" + } + } + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "links": [], + "mappings": [ + { + "options": { + "null": { + "index": 0, + "text": "N/A" + } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 18, + "y": 10 + }, + "id": 233, + "interval": "$interval", + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "sum(rate(loki_ingester_streams_created_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by (tenant)", + "hide": false, + "instant": false, + "interval": "$interval", + "legendFormat": "Create / {{ tenant }}", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "sum(rate(loki_ingester_streams_removed_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by (tenant)", + "hide": false, + "instant": false, + "interval": "$interval", + "legendFormat": "Delete / {{ tenant }}", + "refId": "B" + } + ], + "title": "Ingester Stream Operations", + "transformations": [ + { + "id": "calculateField", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": {}, + "renameByName": { + "Create / fake": "Create / Default Tenant", + "Delete / fake": "Delete / Default Tenant", + "Store / fake": "Store / Default Tenant" + } + } + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "links": [], + "mappings": [ + { + "options": { + "null": { + "index": 0, + "text": "N/A" + } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 6, + "x": 0, + "y": 13 + }, + "id": 236, + "interval": "$interval", + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "sum(rate(loki_chunk_store_fetched_chunks_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by (user)", + "hide": false, + "instant": false, + "interval": "$interval", + "legendFormat": "{{ user}}", + "refId": "A" + } + ], + "title": "Store Operations Fetched", + "transformations": [ + { + "id": "calculateField", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": {}, + "renameByName": { + "Create / fake": "Create / Default Tenant", + "Delete / fake": "Delete / Default Tenant", + "Fetch / fake": "Fetch / Default Tenant", + "Store / fake": "Store / Default Tenant", + "fake": "Default Tenant" + } + } + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "links": [], + "mappings": [ + { + "options": { + "null": { + "index": 0, + "text": "N/A" + } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 6, + "x": 6, + "y": 13 + }, + "id": 237, + "interval": "$interval", + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "sum(rate(loki_chunk_store_stored_chunks_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by (user)", + "hide": false, + "instant": false, + "interval": "$interval", + "legendFormat": "{{ user}}", + "refId": "A" + } + ], + "title": "Store Operations Stored", + "transformations": [ + { + "id": "calculateField", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": {}, + "renameByName": { + "Create / fake": "Create / Default Tenant", + "Delete / fake": "Delete / Default Tenant", + "Fetch / fake": "Default Tenant", + "Store / fake": "Store / Default Tenant", + "fake": "Default Tenant" + } + } + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "links": [], + "mappings": [ + { + "options": { + "null": { + "index": 0, + "text": "N/A" + } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 12, + "y": 13 + }, + "id": 231, + "interval": "$interval", + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum(rate(loki_ingester_chunk_size_bytes_bucket{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by (le))", + "hide": false, + "instant": false, + "interval": "$interval", + "legendFormat": "Chunk Size", + "refId": "A" + } + ], + "title": "Ingester Chunk Size", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "links": [], + "mappings": [ + { + "options": { + "null": { + "index": 0, + "text": "N/A" + } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 15, + "y": 13 + }, + "id": 232, + "interval": "$interval", + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum(rate(loki_ingester_chunk_compression_ratio_bucket{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by (le))", + "hide": false, + "instant": false, + "interval": "$interval", + "legendFormat": "Compression Ratio", + "refId": "A" + } + ], + "title": "Ingester Chunk Compression", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "links": [], + "mappings": [ + { + "options": { + "null": { + "index": 0, + "text": "N/A" + } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 6, + "x": 0, + "y": 16 + }, + "id": 238, + "interval": "$interval", + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "rate(loki_chunk_store_fetched_chunk_bytes_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", + "hide": false, + "instant": false, + "interval": "$interval", + "legendFormat": "{{ user}}", + "refId": "A" + } + ], + "title": "Store Volume Fetched", + "transformations": [ + { + "id": "calculateField", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": {}, + "renameByName": { + "Create / fake": "Create / Default Tenant", + "Delete / fake": "Delete / Default Tenant", + "Fetch / fake": "Fetch / Default Tenant", + "Store / fake": "Store / Default Tenant", + "fake": "Default Tenant" + } + } + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "links": [], + "mappings": [ + { + "options": { + "null": { + "index": 0, + "text": "N/A" + } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 6, + "x": 6, + "y": 16 + }, + "id": 239, + "interval": "$interval", + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "rate(loki_chunk_store_stored_chunk_bytes_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", + "hide": false, + "instant": false, + "interval": "$interval", + "legendFormat": "{{ user}}", + "refId": "A" + } + ], + "title": "Store Volume Stored", + "transformations": [ + { + "id": "calculateField", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": {}, + "renameByName": { + "Create / fake": "Create / Default Tenant", + "Delete / fake": "Delete / Default Tenant", + "Fetch / fake": "Fetch / Default Tenant", + "Store / fake": "Store / Default Tenant", + "fake": "Default Tenant" + } + } + } + ], + "type": "stat" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 19 + }, + "id": 143, + "panels": [], + "title": "Loki API / Connections Metrics", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Time", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 20 + }, + "id": 151, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum(rate(loki_request_duration_seconds_bucket{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by (le,method,route,status_code))", + "hide": false, + "interval": "$interval", + "legendFormat": "{{ method }} / {{ route }} / {{ status_code }}", + "range": true, + "refId": "A" + } + ], + "title": "Loki API - Request Duration", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Requests", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 20 + }, + "id": 128, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "sum(rate(loki_inflight_requests{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by (method, route)", + "hide": false, + "interval": "$interval", + "legendFormat": "{{ method }} / {{ route }}", + "range": true, + "refId": "A" + } + ], + "title": "Loki API - Request Current", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Size", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 28 + }, + "id": 153, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum(rate(loki_response_message_bytes_bucket{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by (le,method,route))", + "hide": false, + "interval": "$interval", + "legendFormat": "{{ method }} / {{ route }}", + "range": true, + "refId": "A" + } + ], + "title": "Loki API - Response Size", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Size", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 28 + }, + "id": 152, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum(rate(loki_request_message_bytes_bucket{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by (le,method,route))", + "hide": false, + "interval": "$interval", + "legendFormat": "{{ method }} / {{ route }}", + "range": true, + "refId": "A" + } + ], + "title": "Loki API - Request Size", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Connections", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 36 + }, + "id": 96, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "loki_tcp_connections{job=~\"$job\",instance=~\"$instance\"}", + "hide": false, + "interval": "$interval", + "legendFormat": "{{ protocol }}", + "range": true, + "refId": "A" + } + ], + "title": "Loki - TCP Connections", + "transformations": [ + { + "id": "calculateField", + "options": { + "mode": "reduceRow", + "reduce": { + "include": [ + "grpc", + "http" + ], + "reducer": "sum" + } + } + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "Time": 1, + "Total": 0, + "grpc": 2, + "http": 3 + }, + "renameByName": { + "grpc": "gRPC", + "http": "HTTP" + } + } + } + ], + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 44 + }, + "id": 114, + "panels": [], + "title": "Loki Cache Metrics", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Cache Hits", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 45 + }, + "id": 115, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "sum(rate(loki_cache_hits{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by(name)", + "hide": false, + "interval": "$interval", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Loki Cache - Hits", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Keys", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 45 + }, + "id": 116, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "sum(rate(loki_cache_fetched_keys{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by(name)", + "hide": false, + "interval": "$interval", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Loki Cache - Fetched Keys", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Time", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 53 + }, + "id": 118, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum(rate(loki_cache_request_duration_seconds_bucket{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by (le,name,method))", + "hide": false, + "interval": "$interval", + "legendFormat": "{{ name }} / {{ method }}", + "range": true, + "refId": "A" + } + ], + "title": "Loki Cache - Request Duration", + "transformations": [ + { + "id": "renameByRegex", + "options": { + "regex": "(.*) / (.*)\\.(.*)", + "renamePattern": "$1 / $3" + } + } + ], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Size", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 53 + }, + "id": 119, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum(rate(loki_cache_value_size_bytes_bucket{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by (le,name,method))", + "hide": false, + "interval": "$interval", + "legendFormat": "{{ name }} / {{ method }}", + "range": true, + "refId": "A" + } + ], + "title": "Loki Cache - Value Size", + "type": "timeseries" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 61 + }, + "id": 108, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Bytes", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 4 + }, + "id": 109, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "sum(rate(loki_distributor_bytes_received_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by(tenant)", + "hide": false, + "interval": "$interval", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Loki Distributor - Received Bytes", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": {}, + "renameByName": { + "fake": "Default Tenant" + } + } + } + ], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Lines", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 4 + }, + "id": 110, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "sum(rate(loki_distributor_lines_received_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by(tenant)", + "hide": false, + "interval": "$interval", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Loki Distributor - Received Lines", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": {}, + "renameByName": { + "fake": "Default Tenant" + } + } + } + ], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Appends", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 12 + }, + "id": 111, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "sum(rate(loki_distributor_ingester_appends_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by(ingester)", + "hide": false, + "interval": "$interval", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Loki Distributor - Batch Appents Sent To Ingester", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": {}, + "renameByName": { + "fake": "Default Tenant" + } + } + } + ], + "type": "timeseries" + } + ], + "title": "Loki Distributor Metrics", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 62 + }, + "id": 127, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Time", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 5 + }, + "id": 130, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum(rate(loki_index_request_duration_seconds_bucket{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by (le,component,operation,status_code))", + "hide": false, + "interval": "$interval", + "legendFormat": "{{ component }} / {{ operation }} / {{ status_code }}", + "range": true, + "refId": "A" + } + ], + "title": "Loki Index - Request Duration", + "transformations": [ + { + "id": "renameByRegex", + "options": { + "regex": "(.*) / (.*)\\.(.*)", + "renamePattern": "$1 / $3" + } + } + ], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Chunks", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 5 + }, + "id": 138, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "sum(rate(loki_index_chunk_refs_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by (status)", + "hide": false, + "interval": "$interval", + "legendFormat": "{{ status }}", + "range": true, + "refId": "A" + } + ], + "title": "Loki Index - Chunks Downloaded", + "transformations": [ + { + "id": "calculateField", + "options": { + "mode": "reduceRow", + "reduce": { + "include": [ + "debug", + "error", + "info", + "warn" + ], + "reducer": "sum" + } + } + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "Time": 1, + "Total": 0, + "grpc": 2, + "http": 3 + }, + "renameByName": { + "debug": "Debug", + "discarded": "Discarded", + "error": "Error", + "grpc": "gRPC", + "http": "HTTP", + "info": "Info", + "matched": "Matched", + "warn": "Warn" + } + } + } + ], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Time", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 13 + }, + "id": 201, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum(rate(loki_kv_request_duration_seconds_bucket{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by (le,kv_name,operation,role,status_code,type))", + "hide": false, + "interval": "$interval", + "legendFormat": "{{ kv_name }} / {{ type }} / {{ operation }} / {{ status_code }}", + "range": true, + "refId": "A" + } + ], + "title": "Loki KV - Request Duration", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Time", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 13 + }, + "id": 202, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum(rate(cortex_kv_request_duration_seconds_bucket{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by (le,kv_name,operation,role,status_code,type))", + "hide": false, + "interval": "$interval", + "legendFormat": "{{ kv_name }} / {{ type }} / {{ operation }} / {{ status_code }}", + "range": true, + "refId": "A" + } + ], + "title": "Cortex KV - Request Duration", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Time", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 21 + }, + "id": 228, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum(rate(cortex_consul_request_duration_seconds_bucket{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by (le,kv_name,operation,status_code))", + "hide": false, + "interval": "$interval", + "legendFormat": "{{ kv_name }} / {{ operation }} / {{ status_code }}", + "range": true, + "refId": "A" + } + ], + "title": "Cortex KV - Request Duration", + "type": "timeseries" + } + ], + "title": "Loki Index Metrics", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 63 + }, + "id": 174, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Time", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 6 + }, + "id": 189, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum(rate(loki_ingester_client_request_duration_seconds_bucket{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by (le,operation,status_code))", + "hide": false, + "interval": "$interval", + "legendFormat": "{{ operation }} / {{ status_code }}", + "range": true, + "refId": "A" + } + ], + "title": "Loki Ingester - Client Request Duration", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Operations", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 6 + }, + "id": 185, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "rate(loki_ingester_chunks_created_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", + "hide": false, + "interval": "$interval", + "legendFormat": "Chunk creations ", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "sum(rate(loki_ingester_chunks_flushed_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by (reason)", + "hide": false, + "interval": "$interval", + "legendFormat": "Chunk flushes / {{ reason }}", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "sum(rate(loki_ingester_chunks_stored_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by (tenant) ", + "hide": false, + "interval": "$interval", + "legendFormat": "Chunks stored / {{ tenant }}", + "range": true, + "refId": "C" + } + ], + "title": "Loki Ingester - Chunk Operations", + "transformations": [ + { + "id": "renameByRegex", + "options": { + "regex": "(.*)fake$", + "renamePattern": "$1Default Tenant" + } + } + ], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Checkpoints", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 14 + }, + "id": 175, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "rate(loki_ingester_checkpoint_creations_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", + "hide": false, + "interval": "$interval", + "legendFormat": "Checkpoint creations ", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "rate(loki_ingester_checkpoint_creations_failed_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", + "hide": false, + "interval": "$interval", + "legendFormat": "Checkpoint creation failures", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "rate(loki_ingester_checkpoint_deletions_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", + "hide": false, + "interval": "$interval", + "legendFormat": "Checkpoint deletions", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "rate(loki_ingester_checkpoint_deletions_failed_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", + "hide": false, + "interval": "$interval", + "legendFormat": "Checkpoint deletion failures", + "range": true, + "refId": "D" + } + ], + "title": "Loki Ingester - Checkpoints", + "transformations": [ + { + "id": "renameByRegex", + "options": { + "regex": "(.*)fake$", + "renamePattern": "$1Default Tenant" + } + } + ], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Bytes", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 14 + }, + "id": 177, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "rate(loki_ingester_checkpoint_logged_bytes_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", + "hide": false, + "interval": "$interval", + "legendFormat": "Bytes written to disk for checkpointing", + "range": true, + "refId": "A" + } + ], + "title": "Loki Ingester - Checkpoints Volume", + "transformations": [ + { + "id": "renameByRegex", + "options": { + "regex": "(.*)fake$", + "renamePattern": "$1Default Tenant" + } + } + ], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Time", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 22 + }, + "id": 183, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "loki_ingester_checkpoint_duration_seconds{job=~\"$job\",instance=~\"$instance\",quantile=\"0.99\"}", + "hide": false, + "interval": "$interval", + "legendFormat": "Time taken to create a checkpoint (0.99 quantile)", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "loki_ingester_checkpoint_duration_seconds{job=~\"$job\",instance=~\"$instance\",quantile=\"0.9\"}", + "hide": false, + "interval": "$interval", + "legendFormat": "Time taken to create a checkpoint (0.9 quantile)", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "loki_ingester_checkpoint_duration_seconds{job=~\"$job\",instance=~\"$instance\",quantile=\"0.5\"}", + "hide": false, + "interval": "$interval", + "legendFormat": "Time taken to create a checkpoint (0.5 quantile)", + "range": true, + "refId": "C" + } + ], + "title": "Loki Ingester - Checkpoint Duration", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Blocks", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 22 + }, + "id": 176, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum(rate(loki_ingester_blocks_per_chunk_bucket{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by (le))", + "hide": false, + "interval": "$interval", + "legendFormat": "Blocks per Chunk", + "range": true, + "refId": "A" + } + ], + "title": "Loki Ingester - Blocks Per Chunk", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "%", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 30 + }, + "id": 180, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum(rate(loki_ingester_chunk_compression_ratio_bucket{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by (le))", + "hide": false, + "interval": "$interval", + "legendFormat": "Compression ratio of chunks (when stored)", + "range": true, + "refId": "A" + } + ], + "title": "Loki Ingester - Chunk Compression Ratio", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Count", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 30 + }, + "id": 190, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "loki_ingester_memory_chunks{job=~\"$job\",instance=~\"$instance\"}", + "hide": false, + "interval": "$interval", + "legendFormat": "Chunks in memory", + "range": true, + "refId": "A" + } + ], + "title": "Loki Ingester - Chunks In Memory", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Bytes", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 38 + }, + "id": 184, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum(rate(loki_ingester_chunk_size_bytes_bucket{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by (le))", + "hide": false, + "interval": "$interval", + "legendFormat": "Distribution of stored chunk sizes (when stored)", + "range": true, + "refId": "A" + } + ], + "title": "Loki Ingester - Chunk Size", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Time", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 38 + }, + "id": 178, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum(rate(loki_ingester_chunk_age_seconds_bucket{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by (le))", + "hide": false, + "interval": "$interval", + "legendFormat": "Distribution of chunk ages (when stored)", + "range": true, + "refId": "A" + } + ], + "title": "Loki Ingester - Chunk Age", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "%", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 62 + }, + "id": 186, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum(rate(loki_ingester_chunk_utilization_bucket{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by (le))", + "hide": false, + "interval": "$interval", + "legendFormat": "Distribution of stored chunk utilization (when stored)", + "range": true, + "refId": "A" + } + ], + "title": "Loki Ingester - Chunk Usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Bytes", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 62 + }, + "id": 187, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "sum(rate(loki_ingester_chunk_stored_bytes_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by (tenant)", + "hide": false, + "interval": "$interval", + "legendFormat": "Bytes stored in chunks / {{ tenant }}", + "range": true, + "refId": "A" + } + ], + "title": "Loki Ingester - Chunk Stored Volume", + "transformations": [ + { + "id": "renameByRegex", + "options": { + "regex": "(.*)fake$", + "renamePattern": "$1Default Tenant" + } + } + ], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Samples", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 70 + }, + "id": 194, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum(rate(loki_ingester_samples_per_chunk_bucket{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by (le))", + "hide": false, + "interval": "$interval", + "legendFormat": "Samples per chunk", + "range": true, + "refId": "A" + } + ], + "title": "Loki Ingester - Samples Per Chunk", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Time", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "h" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 70 + }, + "id": 179, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum(rate(loki_ingester_chunk_bounds_hours_bucket{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by (le))", + "hide": false, + "interval": "$interval", + "legendFormat": "Distribution of chunk end-start durations", + "range": true, + "refId": "A" + } + ], + "title": "Loki Ingester - Chunk Bounds (Hours)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Time", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 78 + }, + "id": 181, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum(rate(loki_ingester_chunk_encode_time_seconds_bucket{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by (le))", + "hide": false, + "interval": "$interval", + "legendFormat": "Distribution of chunk encode times", + "range": true, + "refId": "A" + } + ], + "title": "Loki Ingester - Chunk Encoding Time", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Lines", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 78 + }, + "id": 182, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum(rate(loki_ingester_chunk_entries_bucket{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by (le))", + "hide": false, + "interval": "$interval", + "legendFormat": "Distribution of stored lines per chunk (when stored)", + "range": true, + "refId": "A" + } + ], + "title": "Loki Ingester - Lines Per Chunk", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Operations", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 86 + }, + "id": 196, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "sum(rate(loki_ingester_streams_created_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by (tenant)", + "hide": false, + "interval": "$interval", + "legendFormat": "Stream creations / {{ tenant }}", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "sum(rate(loki_ingester_streams_removed_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by (tenant)", + "hide": false, + "interval": "$interval", + "legendFormat": "Stream deletetions / {{ tenant }}", + "range": true, + "refId": "B" + } + ], + "title": "Loki Ingester - Stream Operations", + "transformations": [ + { + "id": "renameByRegex", + "options": { + "regex": "(.*)fake$", + "renamePattern": "$1Default Tenant" + } + } + ], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Streams", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 86 + }, + "id": 193, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "sum(loki_ingester_memory_streams{job=~\"$job\",instance=~\"$instance\"}) by (tenant)", + "hide": false, + "interval": "$interval", + "legendFormat": "Streams in memory / {{ tenant }}", + "range": true, + "refId": "B" + } + ], + "title": "Loki Ingester - Streams In Memory", + "transformations": [ + { + "id": "renameByRegex", + "options": { + "regex": "(.*)fake$", + "renamePattern": "$1Default Tenant" + } + } + ], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Records", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 94 + }, + "id": 197, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "rate(loki_ingester_wal_records_logged_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", + "hide": false, + "interval": "$interval", + "legendFormat": "WAL records logged", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "rate(loki_ingester_wal_duplicate_entries_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", + "hide": false, + "interval": "$interval", + "legendFormat": "Discarded entries during WAL replay due to existing in checkpoints", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "rate(loki_ingester_wal_disk_full_failures_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", + "hide": false, + "interval": "$interval", + "legendFormat": "WAL write failures due to full disk", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "sum(rate(loki_ingester_wal_discarded_samples_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by (reason)", + "hide": false, + "interval": "$interval", + "legendFormat": "WAL segment entries discarded during replay / {{ reason }}", + "range": true, + "refId": "D" + } + ], + "title": "Loki Ingester - WAL Records", + "transformations": [ + { + "id": "renameByRegex", + "options": { + "regex": "(.*)fake$", + "renamePattern": "$1Default Tenant" + } + } + ], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Bytes", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 94 + }, + "id": 200, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "loki_ingester_wal_bytes_in_use{job=~\"$job\",instance=~\"$instance\"}", + "hide": false, + "legendFormat": "Currently in use by the WAL recovery process", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "rate(loki_ingester_wal_logged_bytes_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", + "hide": false, + "interval": "$interval", + "legendFormat": "Written to disk for WAL records", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "sum(rate(loki_ingester_wal_discarded_bytes_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by (reason)", + "hide": false, + "interval": "$interval", + "legendFormat": "WAL segment bytes discarded during replay / {{ reason }}", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "sum(rate(loki_ingester_wal_discarded_bytes_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by (reason)", + "hide": false, + "interval": "$interval", + "legendFormat": "Recovered from the WAL", + "range": true, + "refId": "D" + } + ], + "title": "Loki Ingester - WAL Volume", + "transformations": [ + { + "id": "renameByRegex", + "options": { + "regex": "(.*)fake$", + "renamePattern": "$1Default Tenant" + } + } + ], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Operations", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 102 + }, + "id": 198, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "rate(loki_ingester_wal_recovered_entries_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", + "hide": false, + "interval": "$interval", + "legendFormat": "Entries recovered from the WAL", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "rate(loki_ingester_wal_recovered_chunks_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", + "hide": false, + "interval": "$interval", + "legendFormat": "Chunks recovered from the WAL checkpoints", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "rate(loki_ingester_wal_recovered_streams_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", + "hide": false, + "interval": "$interval", + "legendFormat": "Streams recovered from the WAL", + "range": true, + "refId": "C" + } + ], + "title": "Loki Ingester - WAL Recovery Operations", + "transformations": [ + { + "id": "renameByRegex", + "options": { + "regex": "(.*)fake$", + "renamePattern": "$1Default Tenant" + } + } + ], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Count", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 102 + }, + "id": 199, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "loki_ingester_wal_replay_active{job=~\"$job\",instance=~\"$instance\"}", + "hide": false, + "interval": "$interval", + "legendFormat": "WAL is replaying", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "loki_ingester_wal_replay_flushing{job=~\"$job\",instance=~\"$instance\"}", + "hide": false, + "interval": "$interval", + "legendFormat": "WAL replay in flushing phase due to backpressure", + "range": true, + "refId": "B" + } + ], + "title": "Loki Ingester - WAL Replay Status", + "transformations": [ + { + "id": "renameByRegex", + "options": { + "regex": "(.*)fake$", + "renamePattern": "$1Default Tenant" + } + } + ], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Length", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 110 + }, + "id": 218, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "cortex_ingester_flush_queue_length{job=~\"$job\",instance=~\"$instance\"}", + "hide": false, + "interval": "$interval", + "legendFormat": "Series pending in the flush queue", + "range": true, + "refId": "A" + } + ], + "title": "Cortex Ingester - Flush Queue", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Clients", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 110 + }, + "id": 229, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "cortex_distributor_ingester_clients{job=~\"$job\",instance=~\"$instance\"}", + "hide": false, + "interval": "$interval", + "legendFormat": "Ingester clients", + "range": true, + "refId": "A" + } + ], + "title": "Cortex Ingester - Clients", + "type": "timeseries" + } + ], + "title": "Loki Ingester Metrics", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 64 + }, + "id": 121, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Messages", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 7 + }, + "id": 124, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "sum(rate(loki_internal_log_messages_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by (level)", + "hide": false, + "interval": "$interval", + "legendFormat": "{{ protocol }}", + "range": true, + "refId": "A" + } + ], + "title": "Loki - Internal Log Messages", + "transformations": [ + { + "id": "calculateField", + "options": { + "mode": "reduceRow", + "reduce": { + "include": [ + "debug", + "error", + "info", + "warn" + ], + "reducer": "sum" + } + } + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "Time": 1, + "Total": 0, + "grpc": 2, + "http": 3 + }, + "renameByName": { + "debug": "Debug", + "error": "Error", + "grpc": "gRPC", + "http": "HTTP", + "info": "Info", + "warn": "Warn" + } + } + } + ], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Flushes", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 7 + }, + "id": 217, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum(rate(loki_log_flushes_bucket{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by (le))", + "hide": false, + "interval": "$interval", + "legendFormat": "Log flushes - line-buffered logger", + "range": true, + "refId": "A" + } + ], + "title": "Loki Line - Log Flushes", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + }, + "links": [] + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 15 + }, + "id": 122, + "maxDataPoints": 50, + "options": { + "calculate": false, + "cellGap": 2, + "cellValues": { + "unit": "bytes" + }, + "color": { + "exponent": 0.5, + "fill": "red", + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "BrBG", + "steps": 64 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 1e-9 + }, + "legend": { + "show": true + }, + "rowsFrame": { + "layout": "auto" + }, + "tooltip": { + "show": true, + "yHistogram": false + }, + "yAxis": { + "axisLabel": "Bucket Size", + "axisPlacement": "left", + "reverse": false, + "unit": "bytes" + } + }, + "pluginVersion": "9.3.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "rate(loki_bytes_per_line_bucket{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])", + "format": "heatmap", + "instant": false, + "interval": "", + "legendFormat": "{{ le }}", + "range": true, + "refId": "A" + } + ], + "title": "Loki Line - Line Size Distribution", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": {}, + "renameByName": { + "+Inf": "Unlimited", + "0.00010239999999999998": "10e-5", + "0.0010485759999999998": "10e-4", + "0.010485759999999998": "10e-3", + "0.10485759999999998": "10e-2", + "1.0239999999999999e-05": "10e-6", + "1.0239999999999999e-06": "10e-7", + "9.999999999999998e-08": "10e-8", + "9.999999999999999e-09": "10e-9", + "9.999999999999999e-10": "10e-10" + } + } + } + ], + "type": "heatmap" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Size", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 15 + }, + "id": 123, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum(rate(loki_bytes_per_line_bucket{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by (le))", + "hide": false, + "interval": "$interval", + "legendFormat": "Number of bytes per line", + "range": true, + "refId": "A" + } + ], + "title": "Loki Line - Line Size", + "transformations": [ + { + "id": "renameByRegex", + "options": { + "regex": "(.*) / (.*)\\.(.*)", + "renamePattern": "$1 / $3" + } + } + ], + "type": "timeseries" + } + ], + "title": "Loki Line Metrics", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 65 + }, + "id": 140, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Lines", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 8 + }, + "id": 148, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "rate(loki_logql_querystats_ingester_sent_lines_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", + "hide": false, + "interval": "$interval", + "legendFormat": "Lines", + "range": true, + "refId": "A" + } + ], + "title": "Loki LogQL - Lines Received From Ingester", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Duplicates", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 8 + }, + "id": 150, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "rate(loki_logql_querystats_duplicates_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", + "hide": false, + "interval": "$interval", + "legendFormat": "Lines", + "range": true, + "refId": "A" + } + ], + "title": "Loki LogQL - Duplicates Found", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Time", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 16 + }, + "id": 125, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum(rate(logql_query_duration_seconds_bucket{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by (le,query_type))", + "hide": false, + "interval": "$interval", + "legendFormat": "{{ query_type }}", + "range": true, + "refId": "A" + } + ], + "title": "Loki LogQL - Query Duration", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Zime", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 16 + }, + "id": 147, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum(rate(loki_logql_querystats_latency_seconds_bucket{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by (le,type,status_code))", + "hide": false, + "interval": "$interval", + "legendFormat": "{{ type }} / {{ status_code }}", + "range": true, + "refId": "A" + } + ], + "title": "Loki LogQL - Query Latency", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Size", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 24 + }, + "id": 141, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum(rate(loki_logql_querystats_bytes_processed_per_seconds_bucket{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by (le,type,status_code))", + "hide": false, + "interval": "$interval", + "legendFormat": "{{ type }} / {{ status_code }}", + "range": true, + "refId": "A" + } + ], + "title": "Loki LogQL - Query Volume Processed", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Chunks", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 24 + }, + "id": 149, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "sum(rate(loki_logql_querystats_downloaded_chunk_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by(type,status_code)", + "hide": false, + "interval": "$interval", + "legendFormat": "{{ type }} / {{ status_code }}", + "range": true, + "refId": "A" + } + ], + "title": "Loki LogQL - Downloaded Chunks", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Zime", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 32 + }, + "id": 146, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum(rate(loki_logql_querystats_chunk_download_latency_seconds_bucket{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by (le,type,status_code))", + "hide": false, + "interval": "$interval", + "legendFormat": "{{ type }} / {{ status_code }}", + "range": true, + "refId": "A" + } + ], + "title": "Loki LogQL - Chunk Download Latency", + "type": "timeseries" + } + ], + "title": "Loki LogQL Metrics", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 66 + }, + "id": 204, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Time", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 9 + }, + "id": 206, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum(rate(loki_querier_query_frontend_request_duration_seconds_bucket{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by (le,operation,status_code))", + "hide": false, + "interval": "$interval", + "legendFormat": "{{ operation }} / {{ status_code }}", + "range": true, + "refId": "A" + } + ], + "title": "Loki Querier - Frontend Request Duration", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Count", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 9 + }, + "id": 210, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "rate(loki_query_frontend_log_result_cache_hit_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", + "hide": false, + "interval": "$interval", + "legendFormat": "Cache hits for the frontend cache", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "rate(loki_query_frontend_log_result_cache_miss_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", + "hide": false, + "interval": "$interval", + "legendFormat": "Cache misses for the frontend cache", + "range": true, + "refId": "B" + } + ], + "title": "Loki Querier - Frontend Cache", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Time", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 17 + }, + "id": 211, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum(rate(loki_query_frontend_partitions_bucket{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by (le))", + "hide": false, + "interval": "$interval", + "legendFormat": "Time-based partitions (sub-requests) per request", + "range": true, + "refId": "A" + } + ], + "title": "Loki Querier - Frontend Partitions", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Count", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 17 + }, + "id": 212, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum(rate(loki_query_frontend_shard_factor_bucket{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by (le,mapper))", + "hide": false, + "interval": "$interval", + "legendFormat": "Downstream queries per request / {{ mapper }}", + "range": true, + "refId": "A" + } + ], + "title": "Loki Querier - Frontend Shard Factor", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Count", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 25 + }, + "id": 213, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "sum(rate(loki_query_frontend_sharding_parsed_queries_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by (le,mapper,type)", + "hide": false, + "interval": "$interval", + "legendFormat": "Parsed queries by evaluation type / {{ mapper }} / {{ type }}", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum(rate(loki_query_frontend_shards_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by (le,mapper,type))", + "hide": false, + "interval": "$interval", + "legendFormat": "Downstream queries by expression type / {{ mapper }} / {{ type }}", + "range": true, + "refId": "B" + } + ], + "title": "Loki Querier - Frontend Shards", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Count", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 25 + }, + "id": 209, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "loki_querier_query_frontend_clients{job=~\"$job\",instance=~\"$instance\"}", + "hide": false, + "interval": "$interval", + "legendFormat": "Clients connected to query-frontend", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "loki_querier_tail_active{job=~\"$job\",instance=~\"$instance\"}", + "hide": false, + "interval": "$interval", + "legendFormat": "Active tailers", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "loki_querier_tail_active_streams{job=~\"$job\",instance=~\"$instance\"}", + "hide": false, + "interval": "$interval", + "legendFormat": "Active streams being tailed", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "loki_querier_worker_concurrency{job=~\"$job\",instance=~\"$instance\"}", + "hide": false, + "interval": "$interval", + "legendFormat": "Concurrent querier workers", + "range": true, + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "loki_querier_worker_inflight_queries{job=~\"$job\",instance=~\"$instance\"}", + "hide": false, + "interval": "$interval", + "legendFormat": "Queries being processed by the querier workers", + "range": true, + "refId": "E" + } + ], + "title": "Loki Querier - Index Cache Clients / Workers", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Operations", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 33 + }, + "id": 208, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "rate(loki_querier_index_cache_gets_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", + "hide": false, + "interval": "$interval", + "legendFormat": "Gets for the index cache", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "rate(loki_querier_index_cache_puts_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", + "hide": false, + "interval": "$interval", + "legendFormat": "Puts for the index cache", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "rate(loki_querier_index_cache_hits_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", + "hide": false, + "interval": "$interval", + "legendFormat": "Cache hits for the index cache", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "rate(loki_querier_index_cache_corruptions_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", + "hide": false, + "interval": "$interval", + "legendFormat": "Cache corruptions for the index cache", + "range": true, + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "rate(loki_querier_index_cache_encode_errors_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", + "hide": false, + "interval": "$interval", + "legendFormat": "Errors for the index cache while encoding the body", + "range": true, + "refId": "E" + } + ], + "title": "Loki Querier - Index Cache Operations", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Connections", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 33 + }, + "id": 219, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "cortex_query_frontend_connected_schedulers{job=~\"$job\",instance=~\"$instance\"}", + "hide": false, + "interval": "$interval", + "legendFormat": "Schedulers this frontend is connected to", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "cortex_query_scheduler_connected_frontend_clients{job=~\"$job\",instance=~\"$instance\"}", + "hide": false, + "interval": "$interval", + "legendFormat": "Query-frontend worker clients", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "cortex_query_scheduler_connected_querier_clients{job=~\"$job\",instance=~\"$instance\"}", + "hide": false, + "interval": "$interval", + "legendFormat": "Querier worker clients", + "range": true, + "refId": "C" + } + ], + "title": "Cortex Query - Frontend Connections", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Operations", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 41 + }, + "id": 215, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "sum(rate(querier_cache_added_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by (cache)", + "hide": false, + "interval": "$interval", + "legendFormat": "Entries added to the cache / {{ cache }}", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "sum(rate(querier_cache_added_new_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by (cache)", + "hide": false, + "interval": "$interval", + "legendFormat": "New entries added to the cache / {{ cache }}", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "sum(rate(querier_cache_evicted_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by (cache,reason)", + "hide": false, + "interval": "$interval", + "legendFormat": "Entries evicted from the cache / {{ cache }} / {{ reason }}", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "sum(rate(querier_cache_gets_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by (cache)", + "hide": false, + "interval": "$interval", + "legendFormat": "Get calls to the cache / {{ cache }}", + "range": true, + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "sum(rate(querier_cache_misses_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by (cache)", + "hide": false, + "interval": "$interval", + "legendFormat": "Missed calls to the cache / {{ cache }}", + "range": true, + "refId": "E" + } + ], + "title": "Loki Query Cache - Throughput", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Count", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 41 + }, + "id": 214, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "sum(querier_cache_entries{job=~\"$job\",instance=~\"$instance\"}) by (cache)", + "hide": false, + "interval": "$interval", + "legendFormat": "Cache Entries / {{ cache }}", + "range": true, + "refId": "A" + } + ], + "title": "Loki Query Cache - Entries", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Bytes", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 65 + }, + "id": 216, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "sum(querier_cache_memory_bytes{job=~\"$job\",instance=~\"$instance\"}) by (cache)", + "hide": false, + "legendFormat": "Current cache size / {{ cache }}", + "range": true, + "refId": "A" + } + ], + "title": "Loki Query Cache - Size", + "transformations": [ + { + "id": "renameByRegex", + "options": { + "regex": "(.*)fake$", + "renamePattern": "$1Default Tenant" + } + } + ], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Count", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 65 + }, + "id": 207, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum(rate(cortex_query_frontend_retries_bucket{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by (le))", + "hide": false, + "interval": "$interval", + "legendFormat": "Request retries", + "range": true, + "refId": "A" + } + ], + "title": "Cortex Query - Frontend Retries", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Time", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 73 + }, + "id": 205, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum(rate(cortex_frontend_query_range_duration_seconds_bucket{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by (le,method,status_code))", + "hide": false, + "interval": "$interval", + "legendFormat": "{{ method }} / {{ status_code }}", + "range": true, + "refId": "A" + } + ], + "title": "Cortex Query - Frontend Range Request Duration", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Connections", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 73 + }, + "id": 220, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "cortex_query_frontend_queries_in_progress{job=~\"$job\",instance=~\"$instance\"}", + "hide": false, + "interval": "$interval", + "legendFormat": "Queries in progress", + "range": true, + "refId": "A" + } + ], + "title": "Cortex Query - Frontend Operations", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Requests", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 81 + }, + "id": 221, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "cortex_query_scheduler_inflight_requests{job=~\"$job\",instance=~\"$instance\",quantile=\"0.99\"}", + "hide": false, + "interval": "$interval", + "legendFormat": "Number of inflight requests (0.99 quantile)", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "cortex_query_scheduler_inflight_requests{job=~\"$job\",instance=~\"$instance\",quantile=\"0.95\"}", + "hide": false, + "interval": "$interval", + "legendFormat": "Number of inflight requests (0.95 quantile)", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "cortex_query_scheduler_inflight_requests{job=~\"$job\",instance=~\"$instance\",quantile=\"0.9\"}", + "hide": false, + "interval": "$interval", + "legendFormat": "Number of inflight requests (0.9 quantile)", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "cortex_query_scheduler_inflight_requests{job=~\"$job\",instance=~\"$instance\",quantile=\"0.8\"}", + "hide": false, + "interval": "$interval", + "legendFormat": "Number of inflight requests (0.8 quantile)", + "range": true, + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "cortex_query_scheduler_inflight_requests{job=~\"$job\",instance=~\"$instance\",quantile=\"0.75\"}", + "hide": false, + "interval": "$interval", + "legendFormat": "Number of inflight requests (0.75 quantile)", + "range": true, + "refId": "E" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "cortex_query_scheduler_inflight_requests{job=~\"$job\",instance=~\"$instance\",quantile=\"0.5\"}", + "hide": false, + "interval": "$interval", + "legendFormat": "Number of inflight requests (0.5 quantile)", + "range": true, + "refId": "F" + } + ], + "title": "Cortex Query - Scheduler Inflight Requests", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Time", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 81 + }, + "id": 222, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by (le))", + "hide": false, + "interval": "$interval", + "legendFormat": "Time spend by requests in queue before getting picked up by a querier", + "range": true, + "refId": "A" + } + ], + "title": "Cortex Query - Scheduler Queue Duration", + "type": "timeseries" + } + ], + "title": "Loki Querier", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 67 + }, + "id": 224, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Heartbeats", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 10 + }, + "id": 225, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "sum(rate(ring_member_heartbeats_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by (name)", + "hide": false, + "interval": "$interval", + "legendFormat": "Heartbeats sent / {{ name }}", + "range": true, + "refId": "A" + } + ], + "title": "Loki Ring - Heathbeats", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Tokens", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 10 + }, + "id": 226, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "sum(ring_member_tokens_owned{job=~\"$job\",instance=~\"$instance\"}) by (name)", + "hide": false, + "interval": "$interval", + "legendFormat": "Tokens owned / {{ name }}", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "sum(ring_member_tokens_to_own{job=~\"$job\",instance=~\"$instance\"}) by (name)", + "hide": false, + "interval": "$interval", + "legendFormat": "Tokens to be owned / {{ name }}", + "range": true, + "refId": "B" + } + ], + "title": "Loki Ring - Tokens", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Heartbeats", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 18 + }, + "id": 227, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "sum(rate(cortex_member_consul_heartbeats_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by (name)", + "hide": false, + "interval": "$interval", + "legendFormat": "Heartbeats sent / {{ name }}", + "range": true, + "refId": "A" + } + ], + "title": "Cortex Ring Member - Heathbeats", + "type": "timeseries" + } + ], + "title": "Loki Ring Metrics", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 68 + }, + "id": 133, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Chunks", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 11 + }, + "id": 135, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "sum(rate(loki_store_series_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by (status)", + "hide": false, + "interval": "$interval", + "legendFormat": "{{ status }}", + "range": true, + "refId": "A" + } + ], + "title": "Loki Store - Series Referenced", + "transformations": [ + { + "id": "calculateField", + "options": { + "mode": "reduceRow", + "reduce": { + "include": [ + "debug", + "error", + "info", + "warn" + ], + "reducer": "sum" + } + } + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "Time": 1, + "Total": 0, + "grpc": 2, + "http": 3 + }, + "renameByName": { + "debug": "Debug", + "discarded": "Discarded", + "error": "Error", + "grpc": "gRPC", + "http": "HTTP", + "info": "Info", + "matched": "Matched", + "warn": "Warn" + } + } + } + ], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Chunks", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 11 + }, + "id": 134, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "sum(rate(loki_store_chunks_downloaded_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by (status)", + "hide": false, + "interval": "$interval", + "legendFormat": "{{ status }}", + "range": true, + "refId": "A" + } + ], + "title": "Loki Store - Chunks Downloaded", + "transformations": [ + { + "id": "calculateField", + "options": { + "mode": "reduceRow", + "reduce": { + "include": [ + "debug", + "error", + "info", + "warn" + ], + "reducer": "sum" + } + } + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "Time": 1, + "Total": 0, + "grpc": 2, + "http": 3 + }, + "renameByName": { + "debug": "Debug", + "discarded": "Discarded", + "error": "Error", + "grpc": "gRPC", + "http": "HTTP", + "info": "Info", + "matched": "Matched", + "warn": "Warn" + } + } + } + ], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Batch Size", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 19 + }, + "id": 136, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum(rate(loki_store_chunks_per_batch_bucket{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by (le,status))", + "hide": false, + "interval": "$interval", + "legendFormat": "{{ status }}", + "range": true, + "refId": "A" + } + ], + "title": "Loki Store - Chunk Batch Size", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Count", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 19 + }, + "id": 165, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "rate(loki_chunk_fetcher_cache_dequeued_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", + "hide": false, + "interval": "$interval", + "legendFormat": "Dequeue from buffer", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "rate(loki_chunk_fetcher_cache_enqueued_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", + "hide": false, + "interval": "$interval", + "legendFormat": "Enqueue to buffer", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "rate(loki_chunk_fetcher_cache_skipped_buffer_full_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", + "hide": false, + "interval": "$interval", + "legendFormat": "Skipped due to buffer full", + "range": true, + "refId": "C" + } + ], + "title": "Loki Store - Chunk Fetcher Cache", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Chunks", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 27 + }, + "id": 166, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "sum(rate(loki_chunk_store_fetched_chunks_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by (user)", + "hide": false, + "interval": "$interval", + "legendFormat": "Fetched / {{ user }}", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "sum(rate(loki_chunk_store_stored_chunks_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by (user)", + "hide": false, + "interval": "$interval", + "legendFormat": "Stored / {{ user }}", + "range": true, + "refId": "B" + } + ], + "title": "Loki Store - Fetched / Stored Chunks", + "transformations": [ + { + "id": "renameByRegex", + "options": { + "regex": "(.*)fake$", + "renamePattern": "$1Default Tenant" + } + } + ], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Size", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 27 + }, + "id": 167, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "rate(loki_chunk_store_fetched_chunk_bytes_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", + "hide": false, + "interval": "$interval", + "legendFormat": "Fetched / {{ user }}", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "rate(loki_chunk_store_stored_chunk_bytes_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", + "hide": false, + "interval": "$interval", + "legendFormat": "Stored / {{ user }}", + "range": true, + "refId": "B" + } + ], + "title": "Loki Store - Fetched / Stored Chunk Volume", + "transformations": [ + { + "id": "renameByRegex", + "options": { + "regex": "(.*)fake$", + "renamePattern": "$1Default Tenant" + } + } + ], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + }, + "links": [] + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 35 + }, + "id": 163, + "maxDataPoints": 50, + "options": { + "calculate": false, + "cellGap": 2, + "cellValues": { + "unit": "cps" + }, + "color": { + "exponent": 0.5, + "fill": "red", + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "RdBu", + "steps": 64 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 0 + }, + "legend": { + "show": true + }, + "rowsFrame": { + "layout": "auto" + }, + "tooltip": { + "show": true, + "yHistogram": false + }, + "yAxis": { + "axisLabel": "Bucket Entries", + "axisPlacement": "left", + "reverse": false, + "unit": "none" + } + }, + "pluginVersion": "9.3.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "rate(loki_chunk_store_chunks_per_query_bucket{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])", + "format": "heatmap", + "instant": false, + "interval": "", + "legendFormat": "{{ le }}", + "range": true, + "refId": "A" + } + ], + "title": "Loki Store - Chunks Per Query Distribution", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": {}, + "renameByName": { + "+Inf": "Unlimited", + "0.00010239999999999998": "10e-5", + "0.0010485759999999998": "10e-4", + "0.010485759999999998": "10e-3", + "0.10485759999999998": "10e-2", + "1.0239999999999999e-05": "10e-6", + "1.0239999999999999e-06": "10e-7", + "13568.999999999998": "13569", + "144.99999999999997": "145", + "1536.9999999999998": "1537", + "24.999999999999996": "25", + "27264.999999999996": "27265", + "320.99999999999994": "321", + "3200.9999999999995": "3201", + "64.99999999999999": "65", + "6528.999999999999": "6529", + "704.9999999999999": "705", + "8.999999999999998": "9", + "9.999999999999998e-08": "10e-8", + "9.999999999999999e-09": "10e-9", + "9.999999999999999e-10": "10e-10" + } + } + } + ], + "type": "heatmap" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Bucket Entries", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 35 + }, + "id": 156, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum(rate(loki_chunk_store_chunks_per_query_bucket{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by (le))", + "hide": false, + "interval": "$interval", + "legendFormat": "{{ le }}", + "range": true, + "refId": "A" + } + ], + "title": "Loki Store - Chunks Per Query", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + }, + "links": [] + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 43 + }, + "id": 154, + "maxDataPoints": 50, + "options": { + "calculate": false, + "cellGap": 2, + "cellValues": { + "unit": "cps" + }, + "color": { + "exponent": 0.5, + "fill": "red", + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "RdBu", + "steps": 64 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 0 + }, + "legend": { + "show": true + }, + "rowsFrame": { + "layout": "auto" + }, + "tooltip": { + "show": true, + "yHistogram": false + }, + "yAxis": { + "axisLabel": "Bucket Entries", + "axisPlacement": "left", + "reverse": false, + "unit": "none" + } + }, + "pluginVersion": "9.3.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "rate(loki_chunk_store_index_entries_per_chunk_bucket{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])", + "format": "heatmap", + "instant": false, + "interval": "", + "legendFormat": "{{ le }}", + "range": true, + "refId": "A" + } + ], + "title": "Loki Store - Index Entries Per Chunk Distribution", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": {}, + "renameByName": { + "+Inf": "Unlimited", + "0.00010239999999999998": "10e-5", + "0.0010485759999999998": "10e-4", + "0.010485759999999998": "10e-3", + "0.10485759999999998": "10e-2", + "1.0239999999999999e-05": "10e-6", + "1.0239999999999999e-06": "10e-7", + "13568.999999999998": "13569", + "144.99999999999997": "145", + "1536.9999999999998": "1537", + "24.999999999999996": "25", + "27264.999999999996": "27265", + "320.99999999999994": "321", + "3200.9999999999995": "3201", + "64.99999999999999": "65", + "6528.999999999999": "6529", + "704.9999999999999": "705", + "8.999999999999998": "9", + "9.999999999999998e-08": "10e-8", + "9.999999999999999e-09": "10e-9", + "9.999999999999999e-10": "10e-10" + } + } + } + ], + "type": "heatmap" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Bucket Entries", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 43 + }, + "id": 164, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum(rate(loki_chunk_store_index_entries_per_chunk_bucket{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by (le))", + "hide": false, + "interval": "$interval", + "legendFormat": "{{ le }}", + "range": true, + "refId": "A" + } + ], + "title": "Loki Store - Index Entries Per Chunk", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + }, + "links": [] + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 67 + }, + "id": 159, + "maxDataPoints": 50, + "options": { + "calculate": false, + "cellGap": 2, + "cellValues": { + "unit": "cps" + }, + "color": { + "exponent": 0.5, + "fill": "red", + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "RdBu", + "steps": 64 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 0 + }, + "legend": { + "show": true + }, + "rowsFrame": { + "layout": "auto" + }, + "tooltip": { + "show": true, + "yHistogram": false + }, + "yAxis": { + "axisLabel": "Bucket Entries", + "axisPlacement": "left", + "reverse": false, + "unit": "none" + } + }, + "pluginVersion": "9.3.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "rate(loki_chunk_store_index_lookups_per_query_bucket{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])", + "format": "heatmap", + "instant": false, + "interval": "", + "legendFormat": "{{ le }}", + "range": true, + "refId": "A" + } + ], + "title": "Loki Store - Index Lookups Per Query Distribution", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": {}, + "renameByName": { + "+Inf": "Unlimited", + "0.00010239999999999998": "10e-5", + "0.0010485759999999998": "10e-4", + "0.010485759999999998": "10e-3", + "0.10485759999999998": "10e-2", + "1.0239999999999999e-05": "10e-6", + "1.0239999999999999e-06": "10e-7", + "13568.999999999998": "13569", + "144.99999999999997": "145", + "1536.9999999999998": "1537", + "24.999999999999996": "25", + "27264.999999999996": "27265", + "320.99999999999994": "321", + "3200.9999999999995": "3201", + "64.99999999999999": "65", + "6528.999999999999": "6529", + "704.9999999999999": "705", + "8.999999999999998": "9", + "9.999999999999998e-08": "10e-8", + "9.999999999999999e-09": "10e-9", + "9.999999999999999e-10": "10e-10" + } + } + } + ], + "type": "heatmap" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Bucket Entries", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 67 + }, + "id": 160, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum(rate(loki_chunk_store_index_lookups_per_query_bucket{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by (le))", + "hide": false, + "interval": "$interval", + "legendFormat": "{{ le }}", + "range": true, + "refId": "A" + } + ], + "title": "Loki Store - Index Lookups Per Query", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + }, + "links": [] + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 75 + }, + "id": 157, + "maxDataPoints": 50, + "options": { + "calculate": false, + "cellGap": 2, + "cellValues": { + "unit": "cps" + }, + "color": { + "exponent": 0.5, + "fill": "red", + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "RdBu", + "steps": 64 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 0 + }, + "legend": { + "show": true + }, + "rowsFrame": { + "layout": "auto" + }, + "tooltip": { + "show": true, + "yHistogram": false + }, + "yAxis": { + "axisLabel": "Bucket Entries", + "axisPlacement": "left", + "reverse": false, + "unit": "none" + } + }, + "pluginVersion": "9.3.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "rate(loki_chunk_store_series_pre_intersection_per_query_bucket{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])", + "format": "heatmap", + "instant": false, + "interval": "", + "legendFormat": "{{ le }}", + "range": true, + "refId": "A" + } + ], + "title": "Loki Store - Series (Pre Intersection) Per Chunk Distribution", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": {}, + "renameByName": { + "+Inf": "Unlimited", + "0.00010239999999999998": "10e-5", + "0.0010485759999999998": "10e-4", + "0.010485759999999998": "10e-3", + "0.10485759999999998": "10e-2", + "1.0239999999999999e-05": "10e-6", + "1.0239999999999999e-06": "10e-7", + "13568.999999999998": "13569", + "144.99999999999997": "145", + "1536.9999999999998": "1537", + "24.999999999999996": "25", + "27264.999999999996": "27265", + "320.99999999999994": "321", + "3200.9999999999995": "3201", + "64.99999999999999": "65", + "6528.999999999999": "6529", + "704.9999999999999": "705", + "8.999999999999998": "9", + "9.999999999999998e-08": "10e-8", + "9.999999999999999e-09": "10e-9", + "9.999999999999999e-10": "10e-10" + } + } + } + ], + "type": "heatmap" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Bucket Entries", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 75 + }, + "id": 158, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum(rate(loki_chunk_store_series_pre_intersection_per_query_bucket{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by (le))", + "hide": false, + "interval": "$interval", + "legendFormat": "{{ le }}", + "range": true, + "refId": "A" + } + ], + "title": "Loki Store - Series (Pre Intersection) Per Chunk", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + }, + "links": [] + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 83 + }, + "id": 161, + "maxDataPoints": 50, + "options": { + "calculate": false, + "cellGap": 2, + "cellValues": { + "unit": "cps" + }, + "color": { + "exponent": 0.5, + "fill": "red", + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "RdBu", + "steps": 64 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 0 + }, + "legend": { + "show": true + }, + "rowsFrame": { + "layout": "auto" + }, + "tooltip": { + "show": true, + "yHistogram": false + }, + "yAxis": { + "axisLabel": "Bucket Entries", + "axisPlacement": "left", + "reverse": false, + "unit": "none" + } + }, + "pluginVersion": "9.3.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "rate(loki_chunk_store_series_post_intersection_per_query_bucket{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])", + "format": "heatmap", + "instant": false, + "interval": "", + "legendFormat": "{{ le }}", + "range": true, + "refId": "A" + } + ], + "title": "Loki Store - Series (Post Intersection) Per Chunk Distribution", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": {}, + "renameByName": { + "+Inf": "Unlimited", + "0.00010239999999999998": "10e-5", + "0.0010485759999999998": "10e-4", + "0.010485759999999998": "10e-3", + "0.10485759999999998": "10e-2", + "1.0239999999999999e-05": "10e-6", + "1.0239999999999999e-06": "10e-7", + "13568.999999999998": "13569", + "144.99999999999997": "145", + "1536.9999999999998": "1537", + "24.999999999999996": "25", + "27264.999999999996": "27265", + "320.99999999999994": "321", + "3200.9999999999995": "3201", + "64.99999999999999": "65", + "6528.999999999999": "6529", + "704.9999999999999": "705", + "8.999999999999998": "9", + "9.999999999999998e-08": "10e-8", + "9.999999999999999e-09": "10e-9", + "9.999999999999999e-10": "10e-10" + } + } + } + ], + "type": "heatmap" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Bucket Entries", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 83 + }, + "id": 162, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum(rate(loki_chunk_store_series_post_intersection_per_query_bucket{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by (le))", + "hide": false, + "interval": "$interval", + "legendFormat": "{{ le }}", + "range": true, + "refId": "A" + } + ], + "title": "Loki Store - Series (Post Intersection) Per Chunk", + "type": "timeseries" + } + ], + "title": "Loki Store Metrics", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 69 + }, + "id": 170, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Errors", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 12 + }, + "id": 171, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "loki_table_manager_create_failures{job=~\"$job\",instance=~\"$instance\"}", + "hide": false, + "interval": "$interval", + "legendFormat": "Table create failures", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "loki_table_manager_delete_failures{job=~\"$job\",instance=~\"$instance\"}", + "hide": false, + "interval": "$interval", + "legendFormat": "Table delete failures", + "range": true, + "refId": "B" + } + ], + "title": "Loki Table Manager - Errors", + "transformations": [ + { + "id": "renameByRegex", + "options": { + "regex": "(.*)fake$", + "renamePattern": "$1Default Tenant" + } + } + ], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Time", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 12 + }, + "id": 172, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum(rate(loki_table_manager_sync_duration_seconds_bucket{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by (le,operation,status_code))", + "hide": false, + "interval": "$interval", + "legendFormat": "{{ operation }} / {{ status_code }}", + "range": true, + "refId": "A" + } + ], + "title": "Loki Table Manager - Sync Duration", + "transformations": [ + { + "id": "renameByRegex", + "options": { + "regex": "(.*)\\.(.*)", + "renamePattern": "$1 $2" + } + } + ], + "type": "timeseries" + } + ], + "title": "Loki Table Manager Metrics", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 70 + }, + "id": 83, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Count", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 13 + }, + "id": 168, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "go_goroutines{job=~\"$job\",instance=~\"$instance\"}", + "hide": false, + "interval": "$interval", + "legendFormat": "Goroutines", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "go_threads{job=~\"$job\",instance=~\"$instance\"}", + "hide": false, + "interval": "$interval", + "legendFormat": "Threads", + "range": true, + "refId": "B" + } + ], + "title": "Go - Goroutines / Threads", + "transformations": [ + { + "id": "renameByRegex", + "options": { + "regex": "(.*)fake$", + "renamePattern": "$1Default Tenant" + } + } + ], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Avg. Latency", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 13 + }, + "id": 91, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "rate(go_sched_latencies_seconds_sum{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])/rate(go_sched_latencies_seconds_count{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", + "hide": false, + "interval": "$interval", + "legendFormat": "Average Goroutine latency", + "range": true, + "refId": "A" + } + ], + "title": "Go - Goroutines Latency", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + }, + "links": [] + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 21 + }, + "id": 94, + "maxDataPoints": 50, + "options": { + "calculate": false, + "cellGap": 2, + "cellValues": { + "unit": "cps" + }, + "color": { + "exponent": 0.5, + "fill": "red", + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "RdBu", + "steps": 64 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 1e-9 + }, + "legend": { + "show": true + }, + "rowsFrame": { + "layout": "auto" + }, + "tooltip": { + "show": true, + "yHistogram": false + }, + "yAxis": { + "axisLabel": "Bucket Latency", + "axisPlacement": "left", + "reverse": false, + "unit": "s" + } + }, + "pluginVersion": "9.3.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "rate(go_sched_latencies_seconds_bucket{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])", + "format": "heatmap", + "instant": false, + "interval": "", + "legendFormat": "{{ le }}", + "range": true, + "refId": "A" + } + ], + "title": "Go - Goroutines Latency Distribution", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": {}, + "renameByName": { + "+Inf": "Unlimited", + "0.00010239999999999998": "10e-5", + "0.0010485759999999998": "10e-4", + "0.010485759999999998": "10e-3", + "0.10485759999999998": "10e-2", + "1.0239999999999999e-05": "10e-6", + "1.0239999999999999e-06": "10e-7", + "13568.999999999998": "13569", + "144.99999999999997": "145", + "1536.9999999999998": "1537", + "24.999999999999996": "25", + "27264.999999999996": "27265", + "320.99999999999994": "321", + "3200.9999999999995": "3201", + "64.99999999999999": "65", + "6528.999999999999": "6529", + "704.9999999999999": "705", + "8.999999999999998": "9", + "9.999999999999998e-08": "10e-8", + "9.999999999999999e-09": "10e-9", + "9.999999999999999e-10": "10e-10" + } + } + } + ], + "type": "heatmap" + } + ], + "title": "Go General Info", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 71 + }, + "id": 104, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Bytes", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 14 + }, + "id": 92, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "go_memory_classes_heap_free_bytes{job=~\"$job\",instance=~\"$instance\"}", + "interval": "$interval", + "legendFormat": "Free memory", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "go_memory_classes_heap_objects_bytes{job=~\"$job\",instance=~\"$instance\"}", + "hide": false, + "interval": "$interval", + "legendFormat": "Allocated objects memory", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "go_memory_classes_heap_released_bytes{job=~\"$job\",instance=~\"$instance\"}", + "hide": false, + "interval": "$interval", + "legendFormat": "Released memory", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "go_memory_classes_heap_stacks_bytes{job=~\"$job\",instance=~\"$instance\"}", + "hide": false, + "interval": "$interval", + "legendFormat": "Stack reserved memory", + "range": true, + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "go_memory_classes_heap_unused_bytes{job=~\"$job\",instance=~\"$instance\"}", + "hide": false, + "interval": "$interval", + "legendFormat": "Reserved unused memory", + "range": true, + "refId": "E" + } + ], + "title": "Go - Memory Heap", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Bytes", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 14 + }, + "id": 93, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "go_memory_classes_metadata_mcache_free_bytes{job=~\"$job\",instance=~\"$instance\"}", + "interval": "$interval", + "legendFormat": "Free mcache memory", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "go_memory_classes_metadata_mcache_inuse_bytes{job=~\"$job\",instance=~\"$instance\"}", + "hide": false, + "interval": "$interval", + "legendFormat": "Used mcache memory", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "go_memory_classes_metadata_mspan_free_bytes{job=~\"$job\",instance=~\"$instance\"}", + "hide": false, + "interval": "$interval", + "legendFormat": "Free mspan memory", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "go_memory_classes_metadata_mspan_inuse_bytes{job=~\"$job\",instance=~\"$instance\"}", + "hide": false, + "interval": "$interval", + "legendFormat": "Used mspan memory", + "range": true, + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "go_memory_classes_metadata_other_bytes{job=~\"$job\",instance=~\"$instance\"}", + "hide": false, + "interval": "$interval", + "legendFormat": "Used other metadata memory", + "range": true, + "refId": "E" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "go_memory_classes_other_bytes{job=~\"$job\",instance=~\"$instance\"}", + "hide": false, + "interval": "$interval", + "legendFormat": "Used other memory", + "range": true, + "refId": "F" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "go_memory_classes_profiling_buckets_bytes{job=~\"$job\",instance=~\"$instance\"}", + "hide": false, + "interval": "$interval", + "legendFormat": "Used profiling bucket memory", + "range": true, + "refId": "G" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "go_memory_classes_total_bytes{job=~\"$job\",instance=~\"$instance\"}", + "hide": true, + "interval": "$interval", + "legendFormat": "Used total memory", + "range": true, + "refId": "H" + } + ], + "title": "Go - Memory Metadata", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Avg. Object Size", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 22 + }, + "id": 95, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "rate(go_gc_heap_allocs_bytes_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])/rate(go_gc_heap_allocs_objects_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", + "hide": false, + "interval": "$interval", + "legendFormat": "Average object size allocated to the heap by the application", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "rate(go_gc_heap_frees_bytes_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])/rate(go_gc_heap_frees_objects_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", + "hide": false, + "instant": false, + "interval": "$interval", + "legendFormat": "Average object size freed from the heap by the GC", + "range": true, + "refId": "B" + } + ], + "title": "Go - Heap Object Allocation / Free Sizes", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + }, + "links": [] + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 22 + }, + "id": 89, + "maxDataPoints": 50, + "options": { + "calculate": false, + "cellGap": 2, + "cellValues": { + "unit": "cps" + }, + "color": { + "exponent": 0.5, + "fill": "red", + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "PuBuGn", + "steps": 64 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 1e-9 + }, + "legend": { + "show": true + }, + "rowsFrame": { + "layout": "auto" + }, + "tooltip": { + "show": true, + "yHistogram": false + }, + "yAxis": { + "axisLabel": "Bucket Size", + "axisPlacement": "left", + "reverse": false, + "unit": "bytes" + } + }, + "pluginVersion": "9.3.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "rate(go_gc_heap_frees_by_size_bytes_bucket{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])", + "format": "heatmap", + "instant": false, + "interval": "", + "legendFormat": "{{ le }}", + "range": true, + "refId": "A" + } + ], + "title": "Go - Heap Free Distribution", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": {}, + "renameByName": { + "+Inf": "Unlimited", + "13568.999999999998": "13569", + "144.99999999999997": "145", + "1536.9999999999998": "1537", + "24.999999999999996": "25", + "27264.999999999996": "27265", + "320.99999999999994": "321", + "3200.9999999999995": "3201", + "64.99999999999999": "65", + "6528.999999999999": "6529", + "704.9999999999999": "705", + "8.999999999999998": "9" + } + } + } + ], + "type": "heatmap" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + }, + "links": [] + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 30 + }, + "id": 88, + "maxDataPoints": 50, + "options": { + "calculate": false, + "cellGap": 2, + "cellValues": { + "unit": "cps" + }, + "color": { + "exponent": 0.5, + "fill": "red", + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "RdBu", + "steps": 64 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 1e-9 + }, + "legend": { + "show": true + }, + "rowsFrame": { + "layout": "auto" + }, + "tooltip": { + "show": true, + "yHistogram": false + }, + "yAxis": { + "axisLabel": "Bucket Size", + "axisPlacement": "left", + "reverse": false, + "unit": "bytes" + } + }, + "pluginVersion": "9.3.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "rate(go_gc_heap_allocs_by_size_bytes_bucket{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])", + "format": "heatmap", + "instant": false, + "interval": "", + "legendFormat": "{{ le }}", + "range": true, + "refId": "A" + } + ], + "title": "Go - Heap Allocation Distribution", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": {}, + "renameByName": { + "+Inf": "Unlimited", + "13568.999999999998": "13569", + "144.99999999999997": "145", + "1536.9999999999998": "1537", + "24.999999999999996": "25", + "27264.999999999996": "27265", + "320.99999999999994": "321", + "3200.9999999999995": "3201", + "64.99999999999999": "65", + "6528.999999999999": "6529", + "704.9999999999999": "705", + "8.999999999999998": "9" + } + } + } + ], + "type": "heatmap" + } + ], + "title": "Go Memory Metrics", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 72 + }, + "id": 106, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Cycles", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 15 + }, + "id": 84, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "rate(go_gc_cycles_total_gc_cycles_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", + "hide": false, + "interval": "$interval", + "legendFormat": "Count of all completed GC cycles", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "rate(go_gc_cycles_automatic_gc_cycles_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", + "hide": false, + "interval": "$interval", + "legendFormat": "Count of completed GC cycles generated by the Go runtime", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "rate(go_gc_cycles_forced_gc_cycles_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", + "interval": "$interval", + "legendFormat": "Count of completed GC cycles forced by the application.", + "range": true, + "refId": "C" + } + ], + "title": "Go - Garbage Collection Cycles", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Avg. Pause Duration", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 15 + }, + "id": 85, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "rate(go_gc_duration_seconds_sum{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])/rate(go_gc_duration_seconds_count{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", + "hide": false, + "interval": "$interval", + "legendFormat": "Average GC cycle pause duration", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "rate(go_gc_pauses_seconds_sum{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])/rate(go_gc_pauses_seconds_count{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", + "hide": false, + "interval": "$interval", + "legendFormat": "Average GC cycle halt duration", + "range": true, + "refId": "B" + } + ], + "title": "Go - Garbage Collection Pause Duration", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + }, + "links": [] + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 23 + }, + "id": 90, + "maxDataPoints": 50, + "options": { + "calculate": false, + "cellGap": 2, + "cellValues": { + "unit": "cps" + }, + "color": { + "exponent": 0.5, + "fill": "red", + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "BrBG", + "steps": 64 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 1e-9 + }, + "legend": { + "show": true + }, + "rowsFrame": { + "layout": "auto" + }, + "tooltip": { + "show": true, + "yHistogram": false + }, + "yAxis": { + "axisLabel": "Bucket Duration", + "axisPlacement": "left", + "reverse": false, + "unit": "s" + } + }, + "pluginVersion": "9.3.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "rate(go_gc_pauses_seconds_bucket{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])", + "format": "heatmap", + "instant": false, + "interval": "", + "legendFormat": "{{ le }}", + "range": true, + "refId": "A" + } + ], + "title": "Go - Garbage Collection Pauses Distribution", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": {}, + "renameByName": { + "+Inf": "Unlimited", + "0.00010239999999999998": "10e-5", + "0.0010485759999999998": "10e-4", + "0.010485759999999998": "10e-3", + "0.10485759999999998": "10e-2", + "1.0239999999999999e-05": "10e-6", + "1.0239999999999999e-06": "10e-7", + "9.999999999999998e-08": "10e-8", + "9.999999999999999e-09": "10e-9", + "9.999999999999999e-10": "10e-10" + } + } + } + ], + "type": "heatmap" + } + ], + "title": "Go GC Metrics", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 73 + }, + "id": 75, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Seconds", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 74 + }, + "id": 78, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "rate(process_cpu_seconds_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", + "hide": false, + "interval": "$interval", + "legendFormat": "Total user and system CPU time spent in seconds", + "range": true, + "refId": "A" + } + ], + "title": "Process - CPU seconds", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Bytes", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 74 + }, + "id": 79, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "process_virtual_memory_bytes{job=~\"$job\",instance=~\"$instance\"}", + "interval": "$interval", + "legendFormat": "Virtual memory size", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "process_resident_memory_bytes{job=~\"$job\",instance=~\"$instance\"}", + "hide": false, + "interval": "$interval", + "legendFormat": "Resident memory size", + "range": true, + "refId": "B" + } + ], + "title": "Process - Memory", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Descriptors", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 82 + }, + "id": 77, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "process_max_fds{job=~\"$job\",instance=~\"$instance\"}", + "hide": false, + "interval": "$interval", + "legendFormat": "Maximum number of open file descriptors", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "process_open_fds{job=~\"$job\",instance=~\"$instance\"}", + "interval": "$interval", + "legendFormat": "Number of open file descriptors", + "range": true, + "refId": "B" + } + ], + "title": "Process - File descriptors", + "type": "timeseries" + } + ], + "title": "Process Info", + "type": "row" + } + ], + "preload": false, + "refresh": "1m", + "schemaVersion": 40, + "tags": [], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus (Local)", + "value": "localprometheus" + }, + "includeAll": false, + "label": "Prometheus", + "name": "datasource_prometheus", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "current": { + "text": "All", + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "definition": "label_values(loki_build_info,job)", + "includeAll": true, + "label": "Job", + "multi": true, + "name": "job", + "options": [], + "query": { + "query": "label_values(loki_build_info,job)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "sort": 1, + "type": "query" + }, + { + "current": { + "text": [ + "infrahub-loki:3100" + ], + "value": [ + "infrahub-loki:3100" + ] + }, + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "definition": "label_values(up{job=~\"$job\"}, instance)", + "includeAll": true, + "label": "Instance", + "multi": true, + "name": "instance", + "options": [], + "query": { + "query": "label_values(up{job=~\"$job\"}, instance)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "sort": 3, + "type": "query" + }, + { + "auto": true, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "5m", + "value": "5m" + }, + "label": "Interval", + "name": "interval", + "options": [ + { + "selected": false, + "text": "30s", + "value": "30s" + }, + { + "selected": false, + "text": "1m", + "value": "1m" + }, + { + "selected": true, + "text": "5m", + "value": "5m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + }, + { + "selected": false, + "text": "6h", + "value": "6h" + }, + { + "selected": false, + "text": "1d", + "value": "1d" + } + ], + "query": "30s,1m,5m,1h,6h,1d", + "refresh": 2, + "type": "interval" + } + ] + }, + "time": { + "from": "now-3h", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "Loki Monitoring", + "uid": "Kn5xm-gZ4", + "version": 3, + "weekStart": "" +} \ No newline at end of file diff --git a/charts/infrahub-observability/dashboards/neo4j_monitoring.json b/charts/infrahub-observability/dashboards/neo4j_monitoring.json new file mode 100644 index 0000000..2029f7c --- /dev/null +++ b/charts/infrahub-observability/dashboards/neo4j_monitoring.json @@ -0,0 +1,1972 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 6, + "links": [], + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 5, + "x": 0, + "y": 0 + }, + "id": 2, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "sizing": "auto" + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "expr": "neo4j_dbms_page_cache_hit_ratio", + "refId": "A" + } + ], + "title": "Page Cache Hit Ratio", + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "localprometheus" + }, + "description": "Boolean status of the Neo4j server", + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + }, + { + "options": { + "1": { + "text": "UP" + } + }, + "type": "value" + }, + { + "options": { + "0": { + "text": "DOWN" + } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#d44a3a", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 1 + }, + { + "color": "#7eb26d", + "value": 1 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 5, + "x": 5, + "y": 0 + }, + "id": 18, + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "localprometheus" + }, + "editorMode": "code", + "expr": "up{job='database', instance=~'database:2004'}", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "__auto", + "refId": "A" + } + ], + "title": "Neo4j status", + "transparent": true, + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "localprometheus" + }, + "description": "Instant rate", + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "rgb(31, 120, 193)", + "mode": "fixed" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "max": 8, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#d44a3a", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 2 + }, + { + "color": "#299c46", + "value": 4 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 5, + "x": 10, + "y": 0 + }, + "id": 21, + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "localprometheus" + }, + "editorMode": "code", + "expr": "irate(neo4j_database_neo4j_transaction_committed_total{}[$interval])", + "format": "time_series", + "intervalFactor": 1, + "range": true, + "refId": "A" + } + ], + "title": "Committed Transaction Speed", + "transparent": true, + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "localprometheus" + }, + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 5, + "x": 15, + "y": 0 + }, + "id": 20, + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "localprometheus" + }, + "editorMode": "code", + "expr": "neo4j_database_neo4j_transaction_last_committed_tx_id_total", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Last Committed Transaction ID", + "range": true, + "refId": "A" + } + ], + "title": "Last Committed Write Transaction ID", + "transparent": true, + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "localprometheus" + }, + "description": "Instant rate", + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "rgb(31, 120, 193)", + "mode": "fixed" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "max": 8, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#73BF69", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 2 + }, + { + "color": "#d44a3a", + "value": 4 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 20, + "y": 0 + }, + "id": 22, + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "localprometheus" + }, + "editorMode": "code", + "expr": "irate(neo4j_database_neo4j_transaction_rollbacks_total{job=\"database\"}[$interval])", + "format": "time_series", + "intervalFactor": 1, + "range": true, + "refId": "A" + } + ], + "title": "Rolled back Transaction Speed", + "transparent": true, + "type": "stat" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 6 + }, + "id": 1, + "panels": [], + "title": "System Overview", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "localprometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 7 + }, + "id": 23, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "localprometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "container_memory_usage_bytes{container=~\"neo4j|infrahub-server\"}", + "instant": false, + "legendFormat": "{{container}}", + "range": true, + "refId": "A" + } + ], + "title": "Memory usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 7 + }, + "id": 3, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "neo4j_dbms_vm_heap_used", + "range": true, + "refId": "A" + } + ], + "title": "Heap Memory Usage", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "localprometheus" + }, + "description": "All transaction metrics", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 15 + }, + "id": 17, + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true, + "width": 180 + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "localprometheus" + }, + "editorMode": "code", + "expr": "label_replace({__name__=~\"neo4j_database_neo4j_transaction_(started|committed|rollbacks|terminated)_total\"}, \"label\", \"$1\", \"__name__\", \"neo4j_database_neo4j_transaction_(.+)\")", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{label}}", + "range": true, + "refId": "A" + } + ], + "title": "Transactions", + "transparent": true, + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "localprometheus" + }, + "description": "Total number of graph items: node, relationships, ...", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 15 + }, + "id": 16, + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "localprometheus" + }, + "editorMode": "code", + "expr": "label_replace({__name__=~\"neo4j_database_neo4j_ids_in_use_(node|property|relationship)\", job=\"database\", instance=~'database:2004'}, \"label\", \"$1\", \"__name__\", \"neo4j_database_neo4j_ids_in_use_(.+)\")", + "format": "time_series", + "hide": false, + "instant": false, + "intervalFactor": 2, + "legendFormat": "# {{label}}", + "refId": "F" + }, + { + "datasource": { + "type": "prometheus", + "uid": "localprometheus" + }, + "editorMode": "code", + "expr": "neo4j_database_neo4j_ids_in_use_relationship_type{job=\"$job\", instance=~'$instance'}", + "format": "time_series", + "hide": false, + "instant": false, + "intervalFactor": 2, + "legendFormat": "# relationship", + "refId": "A" + } + ], + "title": "Counters of Graph Items", + "transparent": true, + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 23 + }, + "id": 4, + "panels": [], + "title": "Query Performance", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 24 + }, + "id": 5, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "expr": "rate(neo4j_database_neo4j_db_query_execution_latency_millis_sum[5m]) / rate(neo4j_database_neo4j_db_query_execution_latency_millis_count[5m])", + "legendFormat": "Average", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "expr": "histogram_quantile(0.95, rate(neo4j_database_neo4j_db_query_execution_latency_millis_bucket[5m]))", + "legendFormat": "p95", + "refId": "B" + } + ], + "title": "Query Execution Time", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 24 + }, + "id": 6, + "options": { + "legend": { + "calcs": [ + "sum" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "expr": "rate(neo4j_database_neo4j_db_query_execution_success_total[5m])", + "legendFormat": "Success", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "expr": "rate(neo4j_database_neo4j_db_query_execution_failure_total[5m])", + "legendFormat": "Failure", + "refId": "B" + } + ], + "title": "Query Success/Failure Rate", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 32 + }, + "id": 7, + "panels": [], + "title": "Query Timing Analysis", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 33 + }, + "id": 8, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "expr": "rate(neo4j_database_neo4j_db_query_execution_latency_millis{quantile=\"0.5\"}[5m])", + "legendFormat": "Median (p50)", + "refId": "A" + }, + { + "expr": "rate(neo4j_database_neo4j_db_query_execution_latency_millis{quantile=\"0.75\"}[5m])", + "legendFormat": "p75", + "refId": "B" + }, + { + "expr": "rate(neo4j_database_neo4j_db_query_execution_latency_millis{quantile=\"0.95\"}[5m])", + "legendFormat": "p95", + "refId": "C" + }, + { + "expr": "rate(neo4j_database_neo4j_db_query_execution_latency_millis{quantile=\"0.99\"}[5m])", + "legendFormat": "p99", + "refId": "D" + } + ], + "title": "Query Time Distribution", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "localprometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": -1, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "stepAfter", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 8, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 1 + }, + { + "color": "red", + "value": 5 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 33 + }, + "id": 9, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "editorMode": "code", + "expr": "increase(infrahub_db_query_execution_seconds_count[5m]) - ignoring(le) increase(infrahub_db_query_execution_seconds_bucket{le=\"0.5\"}[5m]) > 0", + "legendFormat": "{{query}} {{type}}", + "range": true, + "refId": "A" + } + ], + "title": "Number Slow Queries (>0.5s)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "localprometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 41 + }, + "id": 24, + "options": { + "legend": { + "calcs": [ + "max", + "last" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "localprometheus" + }, + "editorMode": "code", + "expr": "rate(infrahub_db_query_execution_seconds_bucket{le!=\"\"}[$interval])", + "instant": false, + "legendFormat": "{{query}} - {{type}}", + "range": true, + "refId": "A" + } + ], + "title": "Query Response Times", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "localprometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 41 + }, + "id": 25, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "sizing": "auto" + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "localprometheus" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum(rate(infrahub_db_query_execution_seconds_bucket{}[5m])) by (le, query))", + "instant": false, + "legendFormat": "P95 - {{query}}", + "range": true, + "refId": "A" + } + ], + "title": "Query Latency Percentiles", + "type": "gauge" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 49 + }, + "id": 10, + "panels": [], + "title": "Database Size & Resources", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "yellow", + "value": 70 + }, + { + "color": "red", + "value": 85 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 50 + }, + "id": 11, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "neo4j_database_neo4j_store_size_total", + "range": true, + "refId": "A" + } + ], + "title": "Database Size", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 50 + }, + "id": 12, + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "expr": "neo4j_database_neo4j_ids_in_use_node", + "legendFormat": "Nodes", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "expr": "neo4j_database_neo4j_ids_in_use_relationship", + "legendFormat": "Relationships", + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "expr": "neo4j_database_neo4j_ids_in_use_property", + "legendFormat": "Properties", + "refId": "C" + } + ], + "title": "Database Elements", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 58 + }, + "id": 13, + "panels": [], + "title": "Transaction Monitoring", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 59 + }, + "id": 14, + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "expr": "neo4j_database_neo4j_transaction_active_read", + "legendFormat": "Active Read", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "expr": "neo4j_database_neo4j_transaction_active_write", + "legendFormat": "Active Write", + "refId": "B" + } + ], + "title": "Active Transactions", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 59 + }, + "id": 15, + "options": { + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "expr": "rate(neo4j_database_neo4j_transaction_committed_total[5m])", + "legendFormat": "Committed", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "expr": "rate(neo4j_database_neo4j_transaction_rollbacks_total[5m])", + "legendFormat": "Rollbacks", + "refId": "B" + } + ], + "title": "Transaction Rate", + "type": "timeseries" + } + ], + "preload": false, + "refresh": "30s", + "schemaVersion": 40, + "tags": [ + "neo4j" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus (Local)", + "value": "localprometheus" + }, + "includeAll": false, + "label": "Datasource", + "name": "datasource_prometheus", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "auto": true, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "10m", + "value": "10m" + }, + "label": "interval", + "name": "interval", + "options": [ + { + "selected": false, + "text": "1m", + "value": "1m" + }, + { + "selected": true, + "text": "10m", + "value": "10m" + }, + { + "selected": false, + "text": "30m", + "value": "30m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + }, + { + "selected": false, + "text": "6h", + "value": "6h" + }, + { + "selected": false, + "text": "12h", + "value": "12h" + }, + { + "selected": false, + "text": "1d", + "value": "1d" + }, + { + "selected": false, + "text": "7d", + "value": "7d" + }, + { + "selected": false, + "text": "14d", + "value": "14d" + }, + { + "selected": false, + "text": "30d", + "value": "30d" + } + ], + "query": "1m,10m,30m,1h,6h,12h,1d,7d,14d,30d", + "refresh": 2, + "type": "interval" + } + ] + }, + "time": { + "from": "now-3h", + "to": "now" + }, + "timepicker": {}, + "timezone": "utc", + "title": "Neo4j", + "uid": "neo4j-complete1", + "version": 5, + "weekStart": "" +} \ No newline at end of file diff --git a/charts/infrahub-observability/dashboards/prefect_flow_run_overview.json b/charts/infrahub-observability/dashboards/prefect_flow_run_overview.json new file mode 100644 index 0000000..f1a2626 --- /dev/null +++ b/charts/infrahub-observability/dashboards/prefect_flow_run_overview.json @@ -0,0 +1,2150 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "Dashboard for Prefect Exporter chart", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": null, + "links": [], + "liveNow": false, + "panels": [ + { + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 45, + "title": "Overview", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 0, + "y": 1 + }, + "id": 2, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "10.1.5", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "sum(prefect_flows_total)", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Flows total", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 3, + "y": 1 + }, + "id": 3, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "10.1.5", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "sum(prefect_deployments_total)", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Deployments total", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 1 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 6, + "y": 1 + }, + "id": 14, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "10.1.5", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "sum(prefect_info_deployment{is_schedule_active=\"False\"} == 1)", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Deployments paused", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 9, + "y": 1 + }, + "id": 49, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "10.1.5", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "count(prefect_info_flow_runs == 1)", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Flow Runs running", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 12, + "y": 1 + }, + "id": 48, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "10.1.5", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "count(prefect_info_flow_runs)", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Flow Runs total", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 16, + "y": 1 + }, + "id": 60, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "10.1.5", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "avg(prefect_flow_runs_total_run_time_total)", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Flow Runs average run time", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "decimals": 0, + "mappings": [], + "unit": "none" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Running" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "blue", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Completed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Cancelled" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 20, + "y": 1 + }, + "id": 47, + "options": { + "legend": { + "displayMode": "table", + "placement": "right", + "showLegend": true, + "values": [ + "percent" + ] + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.1.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "count by (state_name) (prefect_info_flow_runs)", + "instant": false, + "legendFormat": "{{label_name}}", + "range": true, + "refId": "A" + } + ], + "title": "Flow Runs by state", + "type": "piechart" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 6 + }, + "id": 10, + "panels": [], + "title": "Flows Runs: $flow_name (last 24h)", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "Last 24h", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 0, + "y": 7 + }, + "id": 51, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "10.1.5", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "count(prefect_info_flow_runs{flow_name=~\"$flow_name\"})", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Flow Runs total", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "filterable": true, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "created" + }, + "properties": [ + { + "id": "unit", + "value": "dateTimeAsIso" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "name" + }, + "properties": [ + { + "id": "custom.width", + "value": 201 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "spent time" + }, + "properties": [ + { + "id": "unit", + "value": "s" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "state" + }, + "properties": [ + { + "id": "custom.cellOptions", + "value": { + "mode": "basic", + "type": "color-background" + } + }, + { + "id": "mappings", + "value": [ + { + "options": { + "Cancelled": { + "color": "orange", + "index": 2 + }, + "Completed": { + "color": "green", + "index": 0 + }, + "Crashed": { + "color": "red", + "index": 3 + }, + "Failed": { + "color": "red", + "index": 1 + }, + "Running": { + "color": "blue", + "index": 4 + } + }, + "type": "value" + } + ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "start_time" + }, + "properties": [ + { + "id": "unit", + "value": "dateTimeAsIso" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "end_time" + }, + "properties": [ + { + "id": "unit", + "value": "dateTimeAsIso" + }, + { + "id": "mappings", + "value": [ + { + "options": { + "match": "nan", + "result": { + "index": 0, + "text": "-" + } + }, + "type": "special" + } + ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "runs" + }, + "properties": [ + { + "id": "custom.width", + "value": 85 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "flow_run_name" + }, + "properties": [ + { + "id": "custom.width", + "value": 280 + } + ] + } + ] + }, + "gridPos": { + "h": 12, + "w": 21, + "x": 3, + "y": 7 + }, + "hideTimeOverride": true, + "id": 50, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "enablePagination": true, + "fields": [], + "reducer": [ + "sum" + ], + "show": false + }, + "frameIndex": 0, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "10.1.5", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "prefect_info_flow_runs{state_name=~\"$flow_run_state_name\", flow_name=~\"$flow_name\"}", + "format": "table", + "instant": true, + "legendFormat": "", + "range": false, + "refId": "A" + } + ], + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Value": true, + "__name__": true, + "container": true, + "created": false, + "deployment_id": true, + "deployment_name": true, + "endpoint": true, + "flow_id": true, + "flow_name": true, + "flow_run_id": true, + "instance": true, + "job": true, + "name": false, + "": true, + "pod": true, + "service": true, + "start_time": false, + "state_id": true, + "updated": true, + "work_queue_name": true + }, + "indexByName": { + "Time": 8, + "Value": 22, + "__name__": 9, + "container": 10, + "created": 18, + "deployment_id": 6, + "deployment_name": 7, + "end_time": 20, + "endpoint": 11, + "flow_id": 4, + "flow_name": 5, + "flow_run_id": 1, + "flow_run_name": 0, + "instance": 12, + "job": 13, + "": 14, + "pod": 15, + "prefect_info_flow_runs": 23, + "run_count": 2, + "service": 16, + "start_time": 19, + "state_id": 21, + "total_run_time": 3, + "work_queue_name": 17 + }, + "renameByName": { + "is_schedule_active": "scheduled", + "job": "", + "prefect_info_flow_runs": "state", + "run_count": "runs", + "start_time": "", + "state_name": "state", + "total_run_time": "spent time" + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "Last 24h", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 0, + "y": 11 + }, + "id": 56, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "10.1.5", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "max(max(max_over_time(node__pod_container:container_cpu_usage_seconds_total:sum_irate{=\"$\", pod=~\".*$flow_name.*\"}[$__range])) by (pod))", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "CPU usage peak", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "Last 24h", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 0, + "y": 15 + }, + "id": 55, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "10.1.5", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "max(max(max_over_time(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", =\"$\", pod=~\".*$flow_name.*\", container!=\"\", image!=\"\"}[$__range])) by (pod))", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "MEM usage peak", + "type": "stat" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 19 + }, + "id": 9, + "panels": [], + "title": "Flows", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "Last 24h", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 3, + "x": 0, + "y": 20 + }, + "id": 58, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "10.1.5", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "avg by (flow_name) (prefect_flow_runs_total_run_time_total{flow_name=~\"$flow_name\"})", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Average time", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 21, + "x": 3, + "y": 20 + }, + "id": 61, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.1.5", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "avg by (flow_name) (prefect_flow_runs_total_run_time_total{flow_name=~\"$flow_name\"})", + "instant": false, + "legendFormat": "{{flow_run_name}}", + "range": true, + "refId": "A" + } + ], + "title": "Flow Run total run time", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "filterable": true, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "created" + }, + "properties": [ + { + "id": "unit", + "value": "dateTimeAsIso" + } + ] + } + ] + }, + "gridPos": { + "h": 10, + "w": 24, + "x": 0, + "y": 28 + }, + "id": 12, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "enablePagination": true, + "fields": [], + "reducer": [ + "sum" + ], + "show": false + }, + "frameIndex": 0, + "showHeader": true, + "sortBy": [ + { + "desc": true, + "displayName": "scheduled" + } + ] + }, + "pluginVersion": "10.1.5", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "prefect_info_flows{flow_name=~\"$flow_name\"} == 1", + "format": "table", + "instant": true, + "legendFormat": "", + "range": false, + "refId": "A" + } + ], + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Value": true, + "__name__": true, + "container": true, + "endpoint": true, + "instance": true, + "job": true, + "": true, + "pod": true, + "service": true + }, + "indexByName": { + "Time": 2, + "Value": 12, + "__name__": 3, + "container": 4, + "created": 5, + "endpoint": 6, + "flow_id": 1, + "flow_name": 0, + "instance": 7, + "job": 8, + "": 9, + "pod": 10, + "service": 11 + }, + "renameByName": {} + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 0, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "requests" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineWidth", + "value": 2 + }, + { + "id": "custom.stacking", + "value": { + "group": "A", + "mode": "none" + } + }, + { + "id": "custom.transform", + "value": "constant" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limits" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineWidth", + "value": 2 + }, + { + "id": "custom.stacking", + "value": { + "group": "A", + "mode": "none" + } + }, + { + "id": "custom.transform", + "value": "constant" + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 13, + "x": 0, + "y": 38 + }, + "id": 53, + "interval": "1m", + "links": [], + "options": { + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.1.1", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "avg(sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace=\"$namespace\", pod=~\".*$flow_name.*\"}) by (pod))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "usage", + "range": true, + "refId": "A" + }, + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "kube_pod_container_resource_requests{job=\"kube-state-metrics\", namespace=\"$namespace\", pod=~\".*$flow_name.*\", resource=\"cpu\"}\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "requests", + "range": true, + "refId": "B" + }, + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "kube_pod_container_resource_limits{job=\"kube-state-metrics\", namespace=\"$namespace\", pod=~\".*$flow_name.*\", resource=\"cpu\"}\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "limits", + "range": true, + "refId": "C" + } + ], + "title": "CPU Usage $flow_name", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 0, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "requests" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineWidth", + "value": 2 + }, + { + "id": "custom.stacking", + "value": { + "group": "A", + "mode": "none" + } + }, + { + "id": "custom.transform", + "value": "constant" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limits" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineWidth", + "value": 2 + }, + { + "id": "custom.stacking", + "value": { + "group": "A", + "mode": "none" + } + }, + { + "id": "custom.transform", + "value": "constant" + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 11, + "x": 13, + "y": 38 + }, + "id": 54, + "interval": "1m", + "links": [], + "options": { + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "right", + "showLegend": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.1.1", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "avg(sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", namespace=\"$namespace\", pod=~\".*$flow_name.*\", container!=\"\", image!=\"\"}) by (pod))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "usage", + "range": true, + "refId": "A" + }, + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "kube_pod_container_resource_requests{job=\"kube-state-metrics\", namespace=\"$namespace\", pod=~\".*$flow_name.*\", resource=\"memory\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "requests", + "range": true, + "refId": "B" + }, + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "kube_pod_container_resource_limits{job=\"kube-state-metrics\", namespace=\"$namespace\", pod=~\".*$flow_name.*\", resource=\"memory\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "limits", + "range": true, + "refId": "C" + } + ], + "title": "Memory Usage (WSS)", + "type": "timeseries" + }, + { + "datasource": { + "type": "loki", + "uid": "${datasource_loki}" + }, + "description": "Include all flow_runs", + "gridPos": { + "h": 18, + "w": 24, + "x": 0, + "y": 45 + }, + "id": 46, + "options": { + "dedupStrategy": "none", + "enableLogDetails": true, + "prettifyLogMessage": false, + "showCommonLabels": false, + "showLabels": false, + "showTime": false, + "sortOrder": "Descending", + "wrapLogMessage": false + }, + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "${datasource_loki}" + }, + "editorMode": "code", + "expr": "{namespace=\"$namespace\", pod=~\".*$flow_name.*\"} |= \"$search\"", + "queryType": "range", + "refId": "A" + } + ], + "title": "Flow logs: flow-$flow_name", + "type": "logs" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 63 + }, + "id": 8, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "filterable": true, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "is_schedule_active" + }, + "properties": [ + { + "id": "custom.cellOptions", + "value": { + "mode": "basic", + "type": "color-background" + } + }, + { + "id": "mappings", + "value": [ + { + "options": { + "False": { + "color": "red", + "index": 1, + "text": "Disable" + }, + "True": { + "color": "green", + "index": 0, + "text": "Enable" + } + }, + "type": "value" + } + ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "created" + }, + "properties": [ + { + "id": "unit", + "value": "dateTimeAsIso" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "updated" + }, + "properties": [ + { + "id": "unit", + "value": "dateTimeAsIso" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "name" + }, + "properties": [ + { + "id": "custom.width", + "value": 233 + } + ] + } + ] + }, + "gridPos": { + "h": 10, + "w": 24, + "x": 0, + "y": 29 + }, + "id": 11, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "enablePagination": true, + "fields": [], + "reducer": [ + "sum" + ], + "show": false + }, + "frameIndex": 0, + "showHeader": true, + "sortBy": [ + { + "desc": true, + "displayName": "scheduled" + } + ] + }, + "pluginVersion": "10.1.5", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "prefect_info_deployment{flow_name=~\"$flow_name\"}", + "format": "table", + "instant": true, + "legendFormat": "", + "range": false, + "refId": "A" + } + ], + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Value": true, + "__name__": true, + "container": true, + "created": false, + "endpoint": true, + "instance": true, + "job": true, + "name": false, + "namespace": true, + "pod": true, + "service": true, + "work_pool_name": true, + "work_queue_name": true + }, + "indexByName": { + "Time": 7, + "Value": 18, + "__name__": 8, + "container": 9, + "created": 5, + "deployment_id": 1, + "deployment_name": 0, + "endpoint": 10, + "flow_id": 4, + "flow_name": 3, + "instance": 11, + "is_schedule_active": 2, + "job": 12, + "namespace": 13, + "path": 6, + "pod": 14, + "service": 15, + "work_pool_name": 16, + "work_queue_name": 17 + }, + "renameByName": { + "flow_id": "", + "id": "", + "is_schedule_active": "scheduled", + "job": "" + } + } + } + ], + "type": "table" + } + ], + "title": "Deployments", + "type": "row" + } + ], + "refresh": "30s", + "schemaVersion": 38, + "style": "dark", + "tags": [ + "prefect", + "infrastructure", + "chart", + "monitoring", + "flow-runs" + ], + "templating": { + "list": [ + { + "current": {}, + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "definition": "label_values(namespace)", + "hide": 0, + "includeAll": false, + "label": "Namespace", + "multi": false, + "name": "namespace", + "options": [], + "query": { + "query": "label_values(namespace)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": {}, + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "definition": "label_values(prefect_info_flows,flow_name)", + "hide": 0, + "includeAll": false, + "label": "Flow Name", + "multi": false, + "name": "flow_name", + "options": [], + "query": { + "query": "label_values(prefect_info_flows,flow_name)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": {}, + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "definition": "label_values(prefect_info_flow_runs{flow_name=\"$flow_name\"},flow_run_name)", + "hide": 0, + "includeAll": false, + "label": "Flow Run Name", + "multi": false, + "name": "flow_run_name", + "options": [], + "query": { + "query": "label_values(prefect_info_flow_runs{flow_name=\"$flow_name\"},flow_run_name)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": {}, + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "definition": "label_values(prefect_info_flow_runs,state_name)", + "hide": 0, + "includeAll": true, + "label": "Flow Run State", + "multi": false, + "name": "flow_run_state_name", + "options": [], + "query": { + "query": "label_values(prefect_info_flow_runs,state_name)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "", + "value": "" + }, + "hide": 0, + "label": "Search", + "name": "search", + "options": [ + { + "selected": true, + "text": "", + "value": "" + } + ], + "query": "", + "skipUrlSync": false, + "type": "textbox" + }, + { + "current": { + "selected": false, + "text": "Local Prometheus", + "value": "Local Prometheus" + }, + "hide": 2, + "includeAll": false, + "label": "Prometheus", + "multi": false, + "name": "datasource_prometheus", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + } + ] + }, + "time": { + "from": "now-24h", + "to": "now" + }, + "timepicker": { + "hidden": false + }, + "timezone": "browser", + "title": "Prefect / Flow Runs Overview", + "uid": "prefect-flow-runs-overview", + "version": 4, + "weekStart": "monday" + } \ No newline at end of file diff --git a/charts/infrahub-observability/dashboards/prefect_platform_overview.json b/charts/infrahub-observability/dashboards/prefect_platform_overview.json new file mode 100644 index 0000000..facff2d --- /dev/null +++ b/charts/infrahub-observability/dashboards/prefect_platform_overview.json @@ -0,0 +1,4098 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "Dashboard for Prefect Exporter chart", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": null, + "links": [], + "liveNow": false, + "panels": [ + { + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 45, + "title": "Overview", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 0, + "y": 1 + }, + "id": 2, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.2.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "sum(prefect_flows_total)", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Flows total", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 4, + "y": 1 + }, + "id": 52, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.2.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "count(prefect_info_flow_runs)", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Flow Runs total", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 8, + "y": 1 + }, + "id": 20, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.2.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "count(prefect_info_flow_runs == 1)", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Flow Runs running", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 12, + "y": 1 + }, + "id": 50, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.2.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "sum(prefect_work_pools_total)", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Work Pools total", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "decimals": 0, + "mappings": [], + "unit": "none" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Running" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "blue", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Cancelled" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Completed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 1 + }, + "id": 21, + "options": { + "legend": { + "displayMode": "table", + "placement": "right", + "showLegend": true, + "values": [ + "percent" + ] + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.1.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "count by (state_name) (prefect_info_flow_runs)", + "instant": false, + "legendFormat": "{{label_name}}", + "range": true, + "refId": "A" + } + ], + "title": "Flow Runs by state", + "type": "piechart" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 0, + "y": 6 + }, + "id": 3, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.2.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "sum(prefect_deployments_total)", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Deployments total", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 1 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 4, + "y": 6 + }, + "id": 14, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.2.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "count(prefect_info_deployment{is_schedule_active=\"False\"} == 1)", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Deployments paused", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 8, + "y": 6 + }, + "id": 49, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.2.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "avg(prefect_flow_runs_total_run_time_total)", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Flow Runs average run time", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 12, + "y": 6 + }, + "id": 51, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.2.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "sum(prefect_work_queues_total)", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Work Queues total", + "type": "stat" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 11 + }, + "id": 22, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "0": { + "index": 0, + "text": "DOWN" + }, + "1": { + "index": 1, + "text": "UP" + }, + "null": { + "index": 2, + "text": "N/A" + } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "orange", + "value": 1 + }, + { + "color": "dark-green", + "value": 1 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 2, + "x": 0, + "y": 12 + }, + "id": 26, + "options": { + "colorMode": "background_solid", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.2.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "sum(probe_success{instance=~\".*prefect.*health\"})", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Status", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 2, + "x": 2, + "y": 12 + }, + "id": 15, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.2.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "sum(kube_pod_info{namespace=\"$namespace\", pod=~\"prefect-server.*\"})", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Prefect Server replicas", + "type": "stat" + }, + { + "datasource": { + "type": "loki", + "uid": "${datasource_loki}" + }, + "gridPos": { + "h": 7, + "w": 20, + "x": 4, + "y": 12 + }, + "id": 38, + "options": { + "dedupStrategy": "none", + "enableLogDetails": true, + "prettifyLogMessage": false, + "showCommonLabels": false, + "showLabels": false, + "showTime": false, + "sortOrder": "Descending", + "wrapLogMessage": false + }, + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "${datasource_loki}" + }, + "editorMode": "code", + "expr": "{namespace=\"$namespace\", pod=~\"prefect-server.*\"}", + "queryType": "range", + "refId": "A" + } + ], + "title": "Logs", + "type": "logs" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "filterable": false, + "inspect": false + }, + "decimals": 2, + "displayName": "", + "mappings": [], + "noValue": "-", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Time" + }, + "properties": [ + { + "id": "displayName", + "value": "Time" + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #A" + }, + "properties": [ + { + "id": "displayName", + "value": "CPU Usage" + }, + { + "id": "unit", + "value": "short" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #B" + }, + "properties": [ + { + "id": "displayName", + "value": "CPU Requests" + }, + { + "id": "unit", + "value": "short" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #C" + }, + "properties": [ + { + "id": "displayName", + "value": "CPU Requests %" + }, + { + "id": "unit", + "value": "percentunit" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #D" + }, + "properties": [ + { + "id": "displayName", + "value": "CPU Limits" + }, + { + "id": "unit", + "value": "short" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #E" + }, + "properties": [ + { + "id": "displayName", + "value": "CPU Limits %" + }, + { + "id": "unit", + "value": "percentunit" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "pod" + }, + "properties": [ + { + "id": "displayName", + "value": "Pod" + }, + { + "id": "unit", + "value": "short" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "links", + "value": [ + { + "targetBlank": false, + "title": "Drill down", + "url": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell" + } + ] + }, + { + "id": "custom.align" + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 12, + "x": 0, + "y": 19 + }, + "id": 25, + "interval": "1m", + "links": [], + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [ + { + "desc": true, + "displayName": "CPU Usage" + } + ] + }, + "pluginVersion": "10.2.2", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace=\"$namespace\", pod=~\"prefect-server.*\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + }, + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{namespace=\"$namespace\", pod=~\"prefect-server.*\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B" + }, + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace=\"$namespace\", pod=~\"prefect-server.*\"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{namespace=\"$namespace\", pod=~\"prefect-server.*\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C" + }, + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{namespace=\"$namespace\", pod=~\"prefect-server.*\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D" + }, + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace=\"$namespace\", pod=~\"prefect-server.*\"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{namespace=\"$namespace\", pod=~\"prefect-server.*\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E" + } + ], + "title": "CPU Quota", + "transformations": [ + { + "id": "merge", + "options": { + "reducers": [] + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true + }, + "indexByName": {}, + "renameByName": {} + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 0, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "quota - requests" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#F2495C", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineWidth", + "value": 2 + }, + { + "id": "custom.stacking", + "value": { + "group": "A", + "mode": "none" + } + }, + { + "id": "custom.lineStyle", + "value": { + "dash": [ + 10, + 10 + ], + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "quota - limits" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FF9830", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineWidth", + "value": 2 + }, + { + "id": "custom.stacking", + "value": { + "group": "A", + "mode": "none" + } + }, + { + "id": "custom.lineStyle", + "value": { + "dash": [ + 10, + 10 + ], + "fill": "dash" + } + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 12, + "x": 12, + "y": 19 + }, + "id": 42, + "interval": "1m", + "links": [], + "options": { + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.1.1", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace=\"$namespace\", pod=~\"prefect-server.*\"}) by (pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "range": true, + "refId": "A" + } + ], + "title": "CPU Usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "filterable": false, + "inspect": false + }, + "decimals": 2, + "displayName": "", + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Time" + }, + "properties": [ + { + "id": "displayName", + "value": "Time" + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #A" + }, + "properties": [ + { + "id": "displayName", + "value": "Mem Usage" + }, + { + "id": "unit", + "value": "bytes" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #B" + }, + "properties": [ + { + "id": "displayName", + "value": "Mem Requests" + }, + { + "id": "unit", + "value": "bytes" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #C" + }, + "properties": [ + { + "id": "displayName", + "value": "Mem Requests %" + }, + { + "id": "unit", + "value": "percentunit" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #D" + }, + "properties": [ + { + "id": "displayName", + "value": "Mem Limits" + }, + { + "id": "unit", + "value": "bytes" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #E" + }, + "properties": [ + { + "id": "displayName", + "value": "Mem Limits %" + }, + { + "id": "unit", + "value": "percentunit" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #F" + }, + "properties": [ + { + "id": "displayName", + "value": "Mem Usage (RSS)" + }, + { + "id": "unit", + "value": "bytes" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #G" + }, + "properties": [ + { + "id": "displayName", + "value": "Mem Usage (Cache)" + }, + { + "id": "unit", + "value": "bytes" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #H" + }, + "properties": [ + { + "id": "displayName", + "value": "Mem Usage (Swap)" + }, + { + "id": "unit", + "value": "bytes" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "pod" + }, + "properties": [ + { + "id": "displayName", + "value": "Pod" + }, + { + "id": "unit", + "value": "short" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "links", + "value": [ + { + "targetBlank": false, + "title": "Drill down", + "url": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell" + } + ] + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Pod" + }, + "properties": [ + { + "id": "custom.width", + "value": 208 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Mem Usage" + }, + "properties": [ + { + "id": "custom.width" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Mem Requests" + }, + "properties": [ + { + "id": "custom.width" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Mem Usage (RSS)" + }, + "properties": [ + { + "id": "custom.width" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Mem Usage (Cache)" + }, + "properties": [ + { + "id": "custom.width", + "value": 162 + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 12, + "x": 0, + "y": 25 + }, + "id": 46, + "interval": "1m", + "links": [], + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "enablePagination": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [ + { + "desc": false, + "displayName": "Mem Requests" + } + ] + }, + "pluginVersion": "10.2.2", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", namespace=\"$namespace\", pod=~\"prefect-server.*\", container!=\"\", image!=\"\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + }, + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{namespace=\"$namespace\", pod=~\"prefect-server.*\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B" + }, + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", namespace=\"$namespace\", pod=~\"prefect-server.*\", container!=\"\", image!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{namespace=\"$namespace\", pod=~\"prefect-server.*\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C" + }, + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{namespace=\"$namespace\", pod=~\"prefect-server.*\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D" + }, + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", namespace=\"$namespace\", pod=~\"prefect-server.*\", container!=\"\", image!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{namespace=\"$namespace\", pod=~\"prefect-server.*\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E" + }, + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "sum(container_memory_rss{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", namespace=\"$namespace\", pod=~\"prefect-server.*\", container!=\"\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F" + }, + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "sum(container_memory_cache{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", namespace=\"$namespace\", pod=~\"prefect-server.*\", container!=\"\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "G" + }, + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "sum(container_memory_swap{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", namespace=\"$namespace\", pod=~\"prefect-server.*\", container!=\"\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "H" + } + ], + "title": "Memory Quota", + "transformations": [ + { + "id": "merge", + "options": { + "reducers": [] + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true + }, + "indexByName": {}, + "renameByName": {} + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 0, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "quota - requests" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#F2495C", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineWidth", + "value": 2 + }, + { + "id": "custom.stacking", + "value": { + "group": "A", + "mode": "none" + } + }, + { + "id": "custom.lineStyle", + "value": { + "dash": [ + 10, + 10 + ], + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "quota - limits" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FF9830", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineWidth", + "value": 2 + }, + { + "id": "custom.stacking", + "value": { + "group": "A", + "mode": "none" + } + }, + { + "id": "custom.lineStyle", + "value": { + "dash": [ + 10, + 10 + ], + "fill": "dash" + } + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 12, + "x": 12, + "y": 25 + }, + "id": 27, + "interval": "1m", + "links": [], + "options": { + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.1.1", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", namespace=\"$namespace\", pod=~\"prefect-server.*\", container!=\"\", image!=\"\"}) by (pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "range": true, + "refId": "A" + } + ], + "title": "Memory Usage (w/o cache)", + "type": "timeseries" + } + ], + "title": "Prefect Server overview", + "type": "row" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 12 + }, + "id": 34, + "panels": [], + "title": "Prefect Agent overview", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 0, + "y": 13 + }, + "id": 39, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.2.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "expr": "sum(kube_deployment_status_replicas{deployment=~\".*agent|.*worker\"})", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Agents/Worker total", + "type": "stat" + }, + { + "datasource": { + "type": "loki", + "uid": "${datasource_loki}" + }, + "gridPos": { + "h": 7, + "w": 20, + "x": 4, + "y": 13 + }, + "id": 40, + "options": { + "dedupStrategy": "none", + "enableLogDetails": true, + "prettifyLogMessage": false, + "showCommonLabels": false, + "showLabels": false, + "showTime": false, + "sortOrder": "Descending", + "wrapLogMessage": false + }, + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "${datasource_loki}" + }, + "editorMode": "code", + "expr": "{namespace=\"$namespace\", pod=~\"(prefect-agent.*|prefect-worker.*)\"}", + "queryType": "range", + "refId": "A" + } + ], + "title": "Logs", + "type": "logs" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "filterable": false, + "inspect": false + }, + "decimals": 2, + "displayName": "", + "mappings": [], + "noValue": "-", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Time" + }, + "properties": [ + { + "id": "displayName", + "value": "Time" + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #A" + }, + "properties": [ + { + "id": "displayName", + "value": "CPU Usage" + }, + { + "id": "unit", + "value": "short" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #B" + }, + "properties": [ + { + "id": "displayName", + "value": "CPU Requests" + }, + { + "id": "unit", + "value": "short" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #C" + }, + "properties": [ + { + "id": "displayName", + "value": "CPU Requests %" + }, + { + "id": "unit", + "value": "percentunit" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #D" + }, + "properties": [ + { + "id": "displayName", + "value": "CPU Limits" + }, + { + "id": "unit", + "value": "short" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #E" + }, + "properties": [ + { + "id": "displayName", + "value": "CPU Limits %" + }, + { + "id": "unit", + "value": "percentunit" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "pod" + }, + "properties": [ + { + "id": "displayName", + "value": "Pod" + }, + { + "id": "unit", + "value": "short" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "links", + "value": [ + { + "targetBlank": false, + "title": "Drill down", + "url": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell" + } + ] + }, + { + "id": "custom.align" + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 12, + "x": 0, + "y": 20 + }, + "id": 41, + "interval": "1m", + "links": [], + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [ + { + "desc": true, + "displayName": "CPU Usage" + } + ] + }, + "pluginVersion": "10.2.2", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace=\"$namespace\", pod=~\"(prefect-agent.*|prefect-worker.*)\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + }, + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{namespace=\"$namespace\", pod=~\"(prefect-agent.*|prefect-worker.*)\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B" + }, + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace=\"$namespace\", pod=~\"(prefect-agent.*|prefect-worker.*)\"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{namespace=\"$namespace\", pod=~\"(prefect-agent.*|prefect-worker.*)\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C" + }, + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{namespace=\"$namespace\", pod=~\"(prefect-agent.*|prefect-worker.*)\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D" + }, + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace=\"$namespace\", pod=~\"(prefect-agent.*|prefect-worker.*)\"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{namespace=\"$namespace\", pod=~\"(prefect-agent.*|prefect-worker.*)\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E" + } + ], + "title": "CPU Quota", + "transformations": [ + { + "id": "merge", + "options": { + "reducers": [] + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true + }, + "indexByName": {}, + "renameByName": {} + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 0, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "quota - requests" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#F2495C", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineWidth", + "value": 2 + }, + { + "id": "custom.stacking", + "value": { + "group": "A", + "mode": "none" + } + }, + { + "id": "custom.lineStyle", + "value": { + "dash": [ + 10, + 10 + ], + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "quota - limits" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FF9830", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineWidth", + "value": 2 + }, + { + "id": "custom.stacking", + "value": { + "group": "A", + "mode": "none" + } + }, + { + "id": "custom.lineStyle", + "value": { + "dash": [ + 10, + 10 + ], + "fill": "dash" + } + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 12, + "x": 12, + "y": 20 + }, + "id": 24, + "interval": "1m", + "links": [], + "options": { + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.1.1", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace=\"$namespace\", pod=~\"prefect-agent.*\"}) by (pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "range": true, + "refId": "A" + } + ], + "title": "CPU Usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "filterable": false, + "inspect": false + }, + "decimals": 2, + "displayName": "", + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Time" + }, + "properties": [ + { + "id": "displayName", + "value": "Time" + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #A" + }, + "properties": [ + { + "id": "displayName", + "value": "Mem Usage" + }, + { + "id": "unit", + "value": "bytes" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #B" + }, + "properties": [ + { + "id": "displayName", + "value": "Mem Requests" + }, + { + "id": "unit", + "value": "bytes" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #C" + }, + "properties": [ + { + "id": "displayName", + "value": "Mem Requests %" + }, + { + "id": "unit", + "value": "percentunit" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #D" + }, + "properties": [ + { + "id": "displayName", + "value": "Mem Limits" + }, + { + "id": "unit", + "value": "bytes" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #E" + }, + "properties": [ + { + "id": "displayName", + "value": "Mem Limits %" + }, + { + "id": "unit", + "value": "percentunit" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #F" + }, + "properties": [ + { + "id": "displayName", + "value": "Mem Usage (RSS)" + }, + { + "id": "unit", + "value": "bytes" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #G" + }, + "properties": [ + { + "id": "displayName", + "value": "Mem Usage (Cache)" + }, + { + "id": "unit", + "value": "bytes" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #H" + }, + "properties": [ + { + "id": "displayName", + "value": "Mem Usage (Swap)" + }, + { + "id": "unit", + "value": "bytes" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "pod" + }, + "properties": [ + { + "id": "displayName", + "value": "Pod" + }, + { + "id": "unit", + "value": "short" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "links", + "value": [ + { + "targetBlank": false, + "title": "Drill down", + "url": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell" + } + ] + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Pod" + }, + "properties": [ + { + "id": "custom.width", + "value": 208 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Mem Usage" + }, + "properties": [ + { + "id": "custom.width" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Mem Requests" + }, + "properties": [ + { + "id": "custom.width" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Mem Usage (RSS)" + }, + "properties": [ + { + "id": "custom.width" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Mem Usage (Cache)" + }, + "properties": [ + { + "id": "custom.width", + "value": 162 + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 12, + "x": 0, + "y": 26 + }, + "id": 43, + "interval": "1m", + "links": [], + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "enablePagination": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [ + { + "desc": false, + "displayName": "Mem Requests" + } + ] + }, + "pluginVersion": "10.2.2", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", namespace=\"$namespace\", pod=~\"(prefect-agent.*|prefect-worker.*)\", container!=\"\", image!=\"\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + }, + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{namespace=\"$namespace\", pod=~\"(prefect-agent.*|prefect-worker.*)\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B" + }, + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", namespace=\"$namespace\", pod=~\"(prefect-agent.*|prefect-worker.*)\", container!=\"\", image!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{namespace=\"$namespace\", pod=~\"(prefect-agent.*|prefect-worker.*)\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C" + }, + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{namespace=\"$namespace\", pod=~\"(prefect-agent.*|prefect-worker.*)\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D" + }, + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", namespace=\"$namespace\", pod=~\"(prefect-agent.*|prefect-worker.*)\", container!=\"\", image!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{namespace=\"$namespace\", pod=~\"(prefect-agent.*|prefect-worker.*)\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E" + }, + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "sum(container_memory_rss{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", namespace=\"$namespace\", pod=~\"(prefect-agent.*|prefect-worker.*)\", container!=\"\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F" + }, + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "sum(container_memory_cache{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", namespace=\"$namespace\", pod=~\"(prefect-agent.*|prefect-worker.*)\", container!=\"\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "G" + }, + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "sum(container_memory_swap{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", namespace=\"$namespace\", pod=~\"(prefect-agent.*|prefect-worker.*)\", container!=\"\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "H" + } + ], + "title": "Memory Quota", + "transformations": [ + { + "id": "merge", + "options": { + "reducers": [] + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true + }, + "indexByName": {}, + "renameByName": {} + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 0, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "quota - requests" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#F2495C", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineWidth", + "value": 2 + }, + { + "id": "custom.stacking", + "value": { + "group": "A", + "mode": "none" + } + }, + { + "id": "custom.lineStyle", + "value": { + "dash": [ + 10, + 10 + ], + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "quota - limits" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FF9830", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineWidth", + "value": 2 + }, + { + "id": "custom.stacking", + "value": { + "group": "A", + "mode": "none" + } + }, + { + "id": "custom.lineStyle", + "value": { + "dash": [ + 10, + 10 + ], + "fill": "dash" + } + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 12, + "x": 12, + "y": 26 + }, + "id": 44, + "interval": "1m", + "links": [], + "options": { + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.1.1", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", namespace=\"$namespace\", pod=~\"prefect-agent.*\", container!=\"\", image!=\"\"}) by (pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "range": true, + "refId": "A" + } + ], + "title": "Memory Usage (w/o cache)", + "type": "timeseries" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 32 + }, + "id": 10, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "filterable": true, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "created" + }, + "properties": [ + { + "id": "unit", + "value": "dateTimeAsIso" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "name" + }, + "properties": [ + { + "id": "custom.width", + "value": 201 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "spent time" + }, + "properties": [ + { + "id": "unit", + "value": "s" + }, + { + "id": "custom.width", + "value": 316 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "state" + }, + "properties": [ + { + "id": "custom.cellOptions", + "value": { + "mode": "basic", + "type": "color-background" + } + }, + { + "id": "mappings", + "value": [ + { + "options": { + "Cancelled": { + "color": "orange", + "index": 2 + }, + "Completed": { + "color": "green", + "index": 0 + }, + "Crashed": { + "color": "red", + "index": 3 + }, + "Failed": { + "color": "red", + "index": 1 + }, + "Running": { + "color": "blue", + "index": 4 + } + }, + "type": "value" + } + ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "start_time" + }, + "properties": [ + { + "id": "unit", + "value": "dateTimeAsIso" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "end_time" + }, + "properties": [ + { + "id": "unit", + "value": "dateTimeAsIso" + }, + { + "id": "mappings", + "value": [ + { + "options": { + "match": "nan", + "result": { + "index": 0, + "text": "-" + } + }, + "type": "special" + } + ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "runs" + }, + "properties": [ + { + "id": "custom.width", + "value": 201 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "flow_run_name" + }, + "properties": [ + { + "id": "custom.width", + "value": 447 + } + ] + } + ] + }, + "gridPos": { + "h": 10, + "w": 24, + "x": 0, + "y": 14 + }, + "hideTimeOverride": true, + "id": 13, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "enablePagination": true, + "fields": [], + "reducer": [ + "sum" + ], + "show": false + }, + "frameIndex": 0, + "showHeader": true, + "sortBy": [ + { + "desc": true, + "displayName": "end_time" + } + ] + }, + "pluginVersion": "10.2.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "prefect_info_flow_runs{state_name=~\"$flow_run_state_name\", flow_name=~\"$flow_name\"}", + "format": "table", + "instant": true, + "legendFormat": "", + "range": false, + "refId": "A" + } + ], + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Value": true, + "__name__": true, + "container": true, + "created": true, + "deployment_name": true, + "endpoint": true, + "flow_id": true, + "flow_name": true, + "flow_run_id": true, + "instance": true, + "job": true, + "name": false, + "namespace": true, + "pod": true, + "service": true, + "start_time": false, + "state_id": true, + "updated": true, + "work_queue_name": true + }, + "indexByName": { + "Time": 8, + "Value": 22, + "__name__": 9, + "container": 10, + "created": 18, + "deployment_id": 6, + "deployment_name": 7, + "end_time": 20, + "endpoint": 11, + "flow_id": 4, + "flow_name": 5, + "flow_run_id": 1, + "flow_run_name": 0, + "instance": 12, + "job": 13, + "namespace": 14, + "pod": 15, + "prefect_info_flow_runs": 23, + "run_count": 2, + "service": 16, + "start_time": 19, + "state_id": 21, + "total_run_time": 3, + "work_queue_name": 17 + }, + "renameByName": { + "is_schedule_active": "scheduled", + "job": "", + "prefect_info_flow_runs": "state", + "run_count": "runs", + "start_time": "", + "state_name": "state", + "total_run_time": "spent time" + } + } + } + ], + "type": "table" + } + ], + "title": "Flows Runs (last 24h)", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 33 + }, + "id": 9, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "filterable": true, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "created" + }, + "properties": [ + { + "id": "unit", + "value": "dateTimeAsIso" + } + ] + } + ] + }, + "gridPos": { + "h": 10, + "w": 24, + "x": 0, + "y": 61 + }, + "id": 12, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "enablePagination": true, + "fields": [], + "reducer": [ + "sum" + ], + "show": false + }, + "frameIndex": 0, + "showHeader": true, + "sortBy": [ + { + "desc": true, + "displayName": "scheduled" + } + ] + }, + "pluginVersion": "10.1.5", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "prefect_info_flows{flow_name=~\"$flow_name\"} == 1", + "format": "table", + "instant": true, + "legendFormat": "", + "range": false, + "refId": "A" + } + ], + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Value": true, + "__name__": true, + "container": true, + "endpoint": true, + "instance": true, + "job": true, + "namespace": true, + "pod": true, + "service": true + }, + "indexByName": { + "Time": 2, + "Value": 12, + "__name__": 3, + "container": 4, + "created": 5, + "endpoint": 6, + "flow_id": 1, + "flow_name": 0, + "instance": 7, + "job": 8, + "namespace": 9, + "pod": 10, + "service": 11 + }, + "renameByName": {} + } + } + ], + "type": "table" + } + ], + "title": "Flows", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 34 + }, + "id": 8, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "filterable": true, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "is_schedule_active" + }, + "properties": [ + { + "id": "custom.cellOptions", + "value": { + "mode": "basic", + "type": "color-background" + } + }, + { + "id": "mappings", + "value": [ + { + "options": { + "False": { + "color": "red", + "index": 1, + "text": "Disable" + }, + "True": { + "color": "green", + "index": 0, + "text": "Enable" + } + }, + "type": "value" + } + ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "created" + }, + "properties": [ + { + "id": "unit", + "value": "dateTimeAsIso" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "updated" + }, + "properties": [ + { + "id": "unit", + "value": "dateTimeAsIso" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "name" + }, + "properties": [ + { + "id": "custom.width", + "value": 233 + } + ] + } + ] + }, + "gridPos": { + "h": 10, + "w": 24, + "x": 0, + "y": 39 + }, + "id": 11, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "enablePagination": true, + "fields": [], + "reducer": [ + "sum" + ], + "show": false + }, + "frameIndex": 0, + "showHeader": true, + "sortBy": [ + { + "desc": true, + "displayName": "scheduled" + } + ] + }, + "pluginVersion": "10.1.5", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "prefect_info_deployment{flow_name=~\"$flow_name\"}", + "format": "table", + "instant": true, + "legendFormat": "", + "range": false, + "refId": "A" + } + ], + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Value": true, + "__name__": true, + "container": true, + "created": false, + "endpoint": true, + "instance": true, + "job": true, + "name": false, + "namespace": true, + "pod": true, + "service": true, + "work_pool_name": true, + "work_queue_name": true + }, + "indexByName": { + "Time": 7, + "Value": 18, + "__name__": 8, + "container": 9, + "created": 5, + "deployment_id": 1, + "deployment_name": 0, + "endpoint": 10, + "flow_id": 4, + "flow_name": 3, + "instance": 11, + "is_schedule_active": 2, + "job": 12, + "namespace": 13, + "path": 6, + "pod": 14, + "service": 15, + "work_pool_name": 16, + "work_queue_name": 17 + }, + "renameByName": { + "flow_id": "", + "id": "", + "is_schedule_active": "scheduled", + "job": "" + } + } + } + ], + "type": "table" + } + ], + "title": "Deployments", + "type": "row" + } + ], + "refresh": "30s", + "schemaVersion": 38, + "tags": [ + "prefect", + "infrastructure", + "chart", + "monitoring" + ], + "templating": { + "list": [ + { + "current": {}, + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "definition": "label_values(namespace)", + "hide": 0, + "includeAll": false, + "label": "Namespace", + "multi": false, + "name": "namespace", + "options": [], + "query": { + "query": "label_values(namespace)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": {}, + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "definition": "label_values(prefect_info_flows,flow_name)", + "hide": 0, + "includeAll": true, + "label": "Flow Name", + "multi": false, + "name": "flow_name", + "options": [], + "query": { + "query": "label_values(prefect_info_flows,flow_name)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": {}, + "datasource": { + "type": "prometheus", + "uid": "${datasource_prometheus}" + }, + "definition": "label_values(prefect_info_flow_runs,state_name)", + "hide": 0, + "includeAll": true, + "label": "Flow Run State", + "multi": false, + "name": "flow_run_state_name", + "options": [], + "query": { + "query": "label_values(prefect_info_flow_runs,state_name)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "Local Prometheus", + "value": "Local Prometheus" + }, + "hide": 2, + "includeAll": false, + "label": "Prometheus", + "multi": false, + "name": "datasource_prometheus", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + } + ] + }, + "time": { + "from": "now-24h", + "to": "now" + }, + "timepicker": { + "hidden": false + }, + "timezone": "browser", + "title": "Prefect / Platform Overview", + "uid": "prefect-platform-overview", + "version": 5, + "weekStart": "monday" + } \ No newline at end of file diff --git a/charts/infrahub-observability/dashboards/rabbitmq_instance_monitoring.json b/charts/infrahub-observability/dashboards/rabbitmq_instance_monitoring.json new file mode 100644 index 0000000..8f1309f --- /dev/null +++ b/charts/infrahub-observability/dashboards/rabbitmq_instance_monitoring.json @@ -0,0 +1,7989 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 2, + "id": 3, + "links": [ + { + "icon": "doc", + "tags": [], + "targetBlank": true, + "title": "Monitoring with Prometheus & Grafana", + "tooltip": "", + "type": "link", + "url": "https://www.rabbitmq.com/prometheus.html" + } + ], + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#37872D", + "value": null + }, + { + "color": "#1F60C4", + "value": 10000 + }, + { + "color": "#C4162A", + "value": 100000 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 6, + "x": 0, + "y": 0 + }, + "id": 64, + "maxDataPoints": 100, + "options": { + "colorMode": "background", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "exemplar": true, + "expr": "sum(rabbitmq_queue_messages_ready{instance=~\"$instance\"} * on(instance) group_left(rabbitmq_cluster) rabbitmq_identity_info{rabbitmq_cluster=~\"$rabbitmq_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Ready messages", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#C4162A", + "value": null + }, + { + "color": "#1F60C4", + "value": -1 + }, + { + "color": "#37872D", + "value": 50 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 6, + "x": 6, + "y": 0 + }, + "id": 62, + "maxDataPoints": 100, + "options": { + "colorMode": "background", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "exemplar": true, + "expr": "sum(rate(rabbitmq_channel_messages_published_total{instance=~\"$instance\"}[60s]) * on(instance) group_left(rabbitmq_cluster) rabbitmq_identity_info{rabbitmq_cluster=~\"$rabbitmq_cluster\"})", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Incoming messages / s", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#C4162A", + "value": null + }, + { + "color": "#1F60C4", + "value": 0 + }, + { + "color": "#37872D", + "value": 10 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 12, + "y": 0 + }, + "id": 66, + "maxDataPoints": 100, + "options": { + "colorMode": "background", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(rabbitmq_channels{instance=~\"$instance\"} * on(instance) group_left(rabbitmq_cluster) rabbitmq_identity_info{rabbitmq_cluster=~\"$rabbitmq_cluster\"}) - sum(rabbitmq_channel_consumers * on(instance) group_left(rabbitmq_cluster) rabbitmq_identity_info{rabbitmq_cluster=~\"$rabbitmq_cluster\"})", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "range": true, + "refId": "A" + } + ], + "title": "Publishers", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#C4162A", + "value": null + }, + { + "color": "#1F60C4", + "value": 0 + }, + { + "color": "#37872D", + "value": 10 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 16, + "y": 0 + }, + "id": 37, + "maxDataPoints": 100, + "options": { + "colorMode": "background", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "exemplar": true, + "expr": "sum(rabbitmq_connections{instance=~\"$instance\"} * on(instance) group_left(rabbitmq_cluster) rabbitmq_identity_info{rabbitmq_cluster=~\"$rabbitmq_cluster\"})", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Connections", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#C4162A", + "value": null + }, + { + "color": "#1F60C4", + "value": 0 + }, + { + "color": "#37872D", + "value": 10 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 20, + "y": 0 + }, + "id": 40, + "maxDataPoints": 100, + "options": { + "colorMode": "background", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "exemplar": true, + "expr": "sum(rabbitmq_queues{product=\"$product\",env=\"$env\",instance=~\"$instance\"} * on(instance) group_left(rabbitmq_cluster) rabbitmq_identity_info{rabbitmq_cluster=~\"$rabbitmq_cluster\"})", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Queues", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#37872D", + "value": null + }, + { + "color": "#1F60C4", + "value": 100 + }, + { + "color": "#C4162A", + "value": 500 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 6, + "x": 0, + "y": 3 + }, + "id": 65, + "maxDataPoints": 100, + "options": { + "colorMode": "background", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "exemplar": true, + "expr": "sum(rabbitmq_queue_messages_unacked{instance=~\"$instance\"} * on(instance) group_left(rabbitmq_cluster) rabbitmq_identity_info{rabbitmq_cluster=~\"$rabbitmq_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Unacknowledged messages", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#C4162A", + "value": null + }, + { + "color": "#1F60C4", + "value": -1 + }, + { + "color": "#37872D", + "value": 50 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 6, + "x": 6, + "y": 3 + }, + "id": 63, + "maxDataPoints": 100, + "options": { + "colorMode": "background", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "exemplar": true, + "expr": "sum(rate(rabbitmq_channel_messages_redelivered_total{instance=~\"$instance\"}[60s]) * on(instance) group_left(rabbitmq_cluster) rabbitmq_identity_info{rabbitmq_cluster=~\"$rabbitmq_cluster\"}) +\nsum(rate(rabbitmq_channel_messages_delivered_total{instance=~\"$instance\"}[60s]) * on(instance) group_left(rabbitmq_cluster) rabbitmq_identity_info{rabbitmq_cluster=~\"$rabbitmq_cluster\"}) +\nsum(rate(rabbitmq_channel_messages_delivered_ack_total{instance=~\"$instance\"}[60s]) * on(instance) group_left(rabbitmq_cluster) rabbitmq_identity_info{rabbitmq_cluster=~\"$rabbitmq_cluster\"}) +\nsum(rate(rabbitmq_channel_get_total{instance=~\"$instance\"}[60s]) * on(instance) group_left(rabbitmq_cluster) rabbitmq_identity_info{rabbitmq_cluster=~\"$rabbitmq_cluster\"}) +\nsum(rate(rabbitmq_channel_get_ack_total{instance=~\"$instance\"}[60s]) * on(instance) group_left(rabbitmq_cluster) rabbitmq_identity_info{rabbitmq_cluster=~\"$rabbitmq_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Outgoing messages / s", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#C4162A", + "value": null + }, + { + "color": "#1F60C4", + "value": 0 + }, + { + "color": "#37872D", + "value": 10 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 12, + "y": 3 + }, + "id": 41, + "maxDataPoints": 100, + "options": { + "colorMode": "background", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "exemplar": true, + "expr": "sum(rabbitmq_channel_consumers{instance=~\"$instance\"} * on(instance) group_left(rabbitmq_cluster) rabbitmq_identity_info{rabbitmq_cluster=~\"$rabbitmq_cluster\"})", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Consumers", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#C4162A", + "value": null + }, + { + "color": "#1F60C4", + "value": 0 + }, + { + "color": "#37872D", + "value": 10 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 16, + "y": 3 + }, + "id": 38, + "maxDataPoints": 100, + "options": { + "colorMode": "background", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "exemplar": true, + "expr": "sum(rabbitmq_channels{instance=~\"$instance\"} * on(instance) group_left(rabbitmq_cluster) rabbitmq_identity_info{rabbitmq_cluster=~\"$rabbitmq_cluster\"})", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Channels", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#1F60C4", + "value": null + }, + { + "color": "#37872D", + "value": 3 + }, + { + "color": "#C4162A", + "value": 8 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 20, + "y": 3 + }, + "id": 67, + "maxDataPoints": 100, + "options": { + "colorMode": "background", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "exemplar": true, + "expr": "sum(rabbitmq_build_info{instance=~\"$instance\"} * on(instance) group_left(rabbitmq_cluster) rabbitmq_identity_info{rabbitmq_cluster=~\"$rabbitmq_cluster\"})", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Nodes", + "type": "stat" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 6 + }, + "id": 4, + "panels": [], + "title": "Nodes", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 7 + }, + "id": 69, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "rabbitmq_build_info{instance=~\"$instance\"} * on(instance) group_left(rabbitmq_cluster) rabbitmq_identity_info{rabbitmq_cluster=~\"$rabbitmq_cluster\"}", + "format": "table", + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Value": true, + "job": true + }, + "indexByName": { + "Time": 0, + "Value": 8, + "erlang_version": 3, + "instance": 2, + "job": 7, + "prometheus_client_version": 5, + "prometheus_plugin_version": 6, + "rabbitmq_cluster": 1, + "rabbitmq_version": 4 + }, + "renameByName": { + "erlang_version": "Erland/OTP", + "instance": "Instance", + "job": "Job", + "prometheus_client_version": "Prometheus Client Version", + "prometheus_plugin_version": "Prometheus Plugin Version", + "rabbitmq_version": "RabbitMQ Version" + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "uid": "${datasource}" + }, + "description": "If the value is zero or less, the memory alarm will be triggered and all publishing connections across all cluster nodes will be blocked.\n\nThis value can temporarily go negative because the memory alarm is triggered with a slight delay.\n\nThe kernel's view of the amount of memory used by the node can differ from what the node itself can observe. This means that this value can be negative for a sustained period of time.\n\nBy default nodes use resident set size (RSS) to compute how much memory they use. This strategy can be changed (see the guides below).\n\n* [Alarms](https://www.rabbitmq.com/alarms.html)\n* [Memory Alarms](https://www.rabbitmq.com/memory.html)\n* [Reasoning About Memory Use](https://www.rabbitmq.com/memory-use.html)\n* [Blocked Connection Notifications](https://www.rabbitmq.com/connection-blocked.html)", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line+area" + } + }, + "decimals": 1, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "orange", + "value": 0 + }, + { + "color": "transparent", + "value": 536870912 + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?0(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#56A64B", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?1(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#F2CC0C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?2(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#3274D9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?3(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#A352CC", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?4(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FF780A", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?5(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#96D98D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?6(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFEE52", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?7(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#8AB8FF", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?8(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#CA95E5", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?9(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFB357", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 11, + "x": 0, + "y": 16 + }, + "id": 7, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max", + "min" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "asc" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "exemplar": true, + "expr": "(rabbitmq_resident_memory_limit_bytes{instance=~\"$instance\"} * on(instance) group_left(rabbitmq_cluster) rabbitmq_identity_info{rabbitmq_cluster=~\"$rabbitmq_cluster\"}) -\n(rabbitmq_process_resident_memory_bytes{instance=~\"$instance\"} * on(instance) group_left(rabbitmq_cluster) rabbitmq_identity_info{rabbitmq_cluster=~\"$rabbitmq_cluster\"})", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "Memory available before publishers blocked", + "type": "timeseries" + }, + { + "datasource": { + "uid": "${datasource}" + }, + "description": "This metric is reported for the partition where the RabbitMQ data directory is stored.\n\nIf the value is zero or less, the disk alarm will be triggered and all publishing connections across all cluster nodes will be blocked.\n\nThis value can temporarily go negative because the free disk space alarm is triggered with a slight delay.\n\n* [Alarms](https://www.rabbitmq.com/alarms.html)\n* [Disk Space Alarms](https://www.rabbitmq.com/disk-alarms.html)\n* [Disk Space](https://www.rabbitmq.com/production-checklist.html#resource-limits-disk-space)\n* [Persistence Configuration](https://www.rabbitmq.com/persistence-conf.html)\n* [Blocked Connection Notifications](https://www.rabbitmq.com/connection-blocked.html)", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line+area" + } + }, + "decimals": 1, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "orange", + "value": 1073741824 + }, + { + "color": "transparent", + "value": 5368709120 + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?0(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#56A64B", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?1(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#F2CC0C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?2(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#3274D9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?3(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#A352CC", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?4(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FF780A", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?5(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#96D98D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?6(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFEE52", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?7(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#8AB8FF", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?8(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#CA95E5", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?9(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFB357", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 11, + "y": 16 + }, + "id": 8, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max", + "min" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "asc" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "exemplar": true, + "expr": "rabbitmq_disk_space_available_bytes{instance=~\"$instance\"} * on(instance) group_left(rabbitmq_cluster) rabbitmq_identity_info{rabbitmq_cluster=~\"$rabbitmq_cluster\"}", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "Disk space available before publishers blocked", + "type": "timeseries" + }, + { + "datasource": { + "uid": "${datasource}" + }, + "description": "When this value reaches zero, new connections will not be accepted and disk write operations may fail.\n\nClient libraries, peer nodes and CLI tools will not be able to connect when the node runs out of available file descriptors.\n\n* [Open File Handles Limit](https://www.rabbitmq.com/production-checklist.html#resource-limits-file-handle-limit)", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line+area" + } + }, + "decimals": -1, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "orange", + "value": 500 + }, + { + "color": "transparent", + "value": 1000 + } + ] + }, + "unit": "none" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?0(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#56A64B", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?1(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#F2CC0C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?2(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#3274D9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?3(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#A352CC", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?4(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FF780A", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?5(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#96D98D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?6(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFEE52", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?7(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#8AB8FF", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?8(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#CA95E5", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?9(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFB357", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 4, + "w": 5, + "x": 19, + "y": 16 + }, + "id": 2, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max", + "min" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "asc" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "exemplar": true, + "expr": "(rabbitmq_process_max_fds{instance=~\"$instance\"} * on(instance) group_left(rabbitmq_cluster) rabbitmq_identity_info{rabbitmq_cluster=~\"$rabbitmq_cluster\"}) -\n(rabbitmq_process_open_fds{instance=~\"$instance\"} * on(instance) group_left(rabbitmq_cluster) rabbitmq_identity_info{rabbitmq_cluster=~\"$rabbitmq_cluster\"})", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "File descriptors available", + "type": "timeseries" + }, + { + "datasource": { + "uid": "${datasource}" + }, + "description": "When this value reaches zero, new connections will not be accepted.\n\nClient libraries, peer nodes and CLI tools will not be able to connect when the node runs out of available file descriptors.\n\n* [Networking and RabbitMQ](https://www.rabbitmq.com/networking.html)", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line+area" + } + }, + "decimals": -1, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "orange", + "value": 500 + }, + { + "color": "transparent", + "value": 1000 + } + ] + }, + "unit": "none" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?0(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#56A64B", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?1(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#F2CC0C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?2(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#3274D9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?3(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#A352CC", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?4(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FF780A", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?5(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#96D98D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?6(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFEE52", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?7(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#8AB8FF", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?8(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#CA95E5", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?9(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFB357", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 4, + "w": 5, + "x": 19, + "y": 20 + }, + "id": 5, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max", + "min" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "asc" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "exemplar": true, + "expr": "(rabbitmq_process_max_tcp_sockets{instance=~\"$instance\"} * on(instance) group_left(rabbitmq_cluster) rabbitmq_identity_info{rabbitmq_cluster=~\"$rabbitmq_cluster\"}) -\n(rabbitmq_process_open_tcp_sockets{instance=~\"$instance\"} * on(instance) group_left(rabbitmq_cluster) rabbitmq_identity_info{rabbitmq_cluster=~\"$rabbitmq_cluster\"})", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "TCP sockets available", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "area" + } + }, + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 80 + }, + { + "color": "red", + "value": 90 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 11, + "x": 0, + "y": 24 + }, + "id": 71, + "options": { + "legend": { + "calcs": [ + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "rabbitmq_process_resident_memory_bytes{product=\"$product\",env=\"$env\"} / rabbitmq_resident_memory_limit_bytes{product=\"$product\",env=\"$env\"}", + "legendFormat": "{{instance}}", + "range": true, + "refId": "A" + } + ], + "title": "RabbitMQ memory used", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 32 + }, + "id": 27, + "panels": [], + "title": "Queued Messages", + "type": "row" + }, + { + "datasource": { + "uid": "${datasource}" + }, + "description": "Total number of ready messages ready to be delivered to consumers.\n\nAim to keep this value as low as possible. RabbitMQ behaves best when messages are flowing through it. It's OK for publishers to occasionally outpace consumers, but the expectation is that consumers will eventually process all ready messages.\n\nIf this metric keeps increasing, your system will eventually run out of memory and/or disk space. Consider using TTL or Queue Length Limit to prevent unbounded message growth.\n\n* [Queues](https://www.rabbitmq.com/queues.html)\n* [Consumers](https://www.rabbitmq.com/consumers.html)\n* [Queue Length Limit](https://www.rabbitmq.com/maxlength.html)\n* [Time-To-Live and Expiration](https://www.rabbitmq.com/ttl.html)", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?0(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#56A64B", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?1(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#F2CC0C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?2(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#3274D9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?3(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#A352CC", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?4(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FF780A", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?5(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#96D98D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?6(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFEE52", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?7(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#8AB8FF", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?8(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#CA95E5", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?9(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFB357", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 5, + "w": 12, + "x": 0, + "y": 33 + }, + "id": 9, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max", + "min" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "exemplar": true, + "expr": "sum(rabbitmq_queue_messages_ready{instance=~\"$instance\"} * on(instance) group_left(rabbitmq_cluster) rabbitmq_identity_info{rabbitmq_cluster=~\"$rabbitmq_cluster\"}) by(instance)", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "Messages ready to be delivered to consumers", + "type": "timeseries" + }, + { + "datasource": { + "uid": "${datasource}" + }, + "description": "The total number of messages that are either in-flight to consumers, currently being processed by consumers or simply waiting for the consumer acknowledgements to be processed by the queue. Until the queue processes the message acknowledgement, the message will remain unacknowledged.\n\n* [Queues](https://www.rabbitmq.com/queues.html)\n* [Confirms and Acknowledgements](https://www.rabbitmq.com/confirms.html)\n* [Consumer Prefetch](https://www.rabbitmq.com/consumer-prefetch.html)", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?0(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#56A64B", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?1(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#F2CC0C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?2(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#3274D9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?3(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#A352CC", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?4(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FF780A", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?5(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#96D98D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?6(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFEE52", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?7(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#8AB8FF", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?8(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#CA95E5", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?9(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFB357", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 5, + "w": 12, + "x": 12, + "y": 33 + }, + "id": 19, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max", + "min" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "exemplar": true, + "expr": "sum(rabbitmq_queue_messages_unacked{instance=~\"$instance\"} * on(instance) group_left(rabbitmq_cluster) rabbitmq_identity_info{rabbitmq_cluster=~\"$rabbitmq_cluster\"}) by(instance)", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "Messages pending consumer acknowledgement", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 38 + }, + "id": 11, + "panels": [], + "title": "Incoming Messages", + "type": "row" + }, + { + "datasource": { + "uid": "${datasource}" + }, + "description": "The incoming message rate before any routing rules are applied.\n\nIf this value is lower than the number of messages published to queues, it may indicate that some messages are delivered to more than one queue.\n\nIf this value is higher than the number of messages published to queues, messages cannot be routed and will either be dropped or returned to publishers.\n\n* [Publishers](https://www.rabbitmq.com/publishers.html)", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?0(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#56A64B", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?1(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#F2CC0C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?2(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#3274D9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?3(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#A352CC", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?4(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FF780A", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?5(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#96D98D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?6(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFEE52", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?7(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#8AB8FF", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?8(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#CA95E5", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?9(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFB357", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 5, + "w": 12, + "x": 0, + "y": 39 + }, + "id": 13, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max", + "min" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(rabbitmq_channel_messages_published_total{instance=~\"$instance\"}[60s]) * on(instance) group_left(rabbitmq_cluster) rabbitmq_identity_info{rabbitmq_cluster=~\"$rabbitmq_cluster\"}) by(instance)", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "Messages published / s", + "type": "timeseries" + }, + { + "datasource": { + "uid": "${datasource}" + }, + "description": "The rate of messages confirmed by the broker to publishers. Publishers must opt-in to receive message confirmations.\n\nIf this metric is consistently at zero it may suggest that publisher confirms are not used by clients. The safety of published messages is likely to be at risk.\n\n* [Publisher Confirms](https://www.rabbitmq.com/confirms.html#publisher-confirms)\n* [Publisher Confirms and Data Safety](https://www.rabbitmq.com/publishers.html#data-safety)\n* [When Will Published Messages Be Confirmed by the Broker?](https://www.rabbitmq.com/confirms.html#when-publishes-are-confirmed)", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?0(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#56A64B", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?1(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#F2CC0C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?2(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#3274D9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?3(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#A352CC", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?4(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FF780A", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?5(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#96D98D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?6(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFEE52", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?7(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#8AB8FF", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?8(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#CA95E5", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?9(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFB357", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 5, + "w": 12, + "x": 12, + "y": 39 + }, + "id": 18, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max", + "min" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(rabbitmq_channel_messages_confirmed_total{instance=~\"$instance\"}[60s]) * on(instance) group_left(rabbitmq_cluster) rabbitmq_identity_info{rabbitmq_cluster=~\"$rabbitmq_cluster\"}) by(instance)", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "Messages confirmed to publishers / s", + "type": "timeseries" + }, + { + "datasource": { + "uid": "${datasource}" + }, + "description": "The rate of messages received from publishers and successfully routed to the master queue replicas.\n\n* [Queues](https://www.rabbitmq.com/queues.html)\n* [Publishers](https://www.rabbitmq.com/publishers.html)", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?0(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#56A64B", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?1(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#F2CC0C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?2(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#3274D9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?3(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#A352CC", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?4(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FF780A", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?5(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#96D98D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?6(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFEE52", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?7(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#8AB8FF", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?8(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#CA95E5", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?9(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFB357", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 5, + "w": 12, + "x": 0, + "y": 44 + }, + "id": 61, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max", + "min" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(rabbitmq_queue_messages_published_total{instance=~\"$instance\"}[60s]) * on(instance) group_left(rabbitmq_cluster,rabbitmq_node) rabbitmq_identity_info{rabbitmq_cluster=~\"$rabbitmq_cluster\"}) by(instance)", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "Messages routed to queues / s", + "type": "timeseries" + }, + { + "datasource": { + "uid": "${datasource}" + }, + "description": "The rate of messages received from publishers that have publisher confirms enabled and the broker has not confirmed yet.\n\n* [Publishers](https://www.rabbitmq.com/publishers.html)\n* [Confirms and Acknowledgements](https://www.rabbitmq.com/confirms.html)\n* [When Will Published Messages Be Confirmed by the Broker?](https://www.rabbitmq.com/confirms.html#when-publishes-are-confirmed)", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?0(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#56A64B", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?1(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#F2CC0C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?2(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#3274D9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?3(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#A352CC", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?4(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FF780A", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?5(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#96D98D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?6(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFEE52", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?7(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#8AB8FF", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?8(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#CA95E5", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?9(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFB357", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 5, + "w": 12, + "x": 12, + "y": 44 + }, + "id": 12, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max", + "min" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(rabbitmq_channel_messages_unconfirmed{instance=~\"$instance\"}[60s]) * on(instance) group_left(rabbitmq_cluster) rabbitmq_identity_info{rabbitmq_cluster=~\"$rabbitmq_cluster\"}) by(instance)", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "Messages unconfirmed to publishers / s", + "type": "timeseries" + }, + { + "datasource": { + "uid": "${datasource}" + }, + "description": "The rate of messages that cannot be routed and are dropped. \n\nAny value above zero means message loss and likely suggests a routing problem on the publisher end.\n\n* [Unroutable Message Handling](https://www.rabbitmq.com/publishers.html#unroutable)", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "dashed" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "transparent" + }, + { + "color": "red", + "value": 0 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/rabbit/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#C4162A", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 5, + "w": 12, + "x": 0, + "y": 49 + }, + "id": 34, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max", + "min" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(rabbitmq_channel_messages_unroutable_dropped_total{instance=~\"$instance\"}[60s]) * on(instance) group_left(rabbitmq_cluster) rabbitmq_identity_info{rabbitmq_cluster=~\"$rabbitmq_cluster\"}) by(instance)", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "Unroutable messages dropped / s", + "type": "timeseries" + }, + { + "datasource": { + "uid": "${datasource}" + }, + "description": "The rate of messages that cannot be routed and are returned back to publishers.\n\nSustained values above zero may indicate a routing problem on the publisher end.\n\n* [Unroutable Message Handling](https://www.rabbitmq.com/publishers.html#unroutable)\n* [When Will Published Messages Be Confirmed by the Broker?](https://www.rabbitmq.com/confirms.html#when-publishes-are-confirmed)", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "dashed" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "transparent" + }, + { + "color": "red", + "value": 0 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/rabbit/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#C4162A", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 5, + "w": 12, + "x": 12, + "y": 49 + }, + "id": 16, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max", + "min" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(rabbitmq_channel_messages_unroutable_returned_total{instance=~\"$instance\"}[60s]) * on(instance) group_left(rabbitmq_cluster) rabbitmq_identity_info{rabbitmq_cluster=~\"$rabbitmq_cluster\"}) by(instance)", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "Unroutable messages returned to publishers / s", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 54 + }, + "id": 29, + "panels": [], + "title": "Outgoing Messages", + "type": "row" + }, + { + "datasource": { + "uid": "${datasource}" + }, + "description": "The rate of messages delivered to consumers. It includes messages that have been redelivered.\n\nThis metric does not include messages that have been fetched by consumers using `basic.get` (consumed by polling).\n\n* [Consumers](https://www.rabbitmq.com/consumers.html)", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?0(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#56A64B", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?1(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#F2CC0C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?2(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#3274D9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?3(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#A352CC", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?4(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FF780A", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?5(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#96D98D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?6(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFEE52", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?7(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#8AB8FF", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?8(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#CA95E5", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?9(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFB357", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 5, + "w": 12, + "x": 0, + "y": 55 + }, + "id": 14, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max", + "min" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "exemplar": true, + "expr": "sum(\n (rate(rabbitmq_channel_messages_delivered_total{instance=~\"$instance\"}[60s]) * on(instance) group_left(rabbitmq_cluster) rabbitmq_identity_info{rabbitmq_cluster=~\"$rabbitmq_cluster\"}) +\n (rate(rabbitmq_channel_messages_delivered_ack_total{instance=~\"$instance\"}[60s]) * on(instance) group_left(rabbitmq_cluster) rabbitmq_identity_info{rabbitmq_cluster=~\"$rabbitmq_cluster\"})\n) by(instance)", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "Messages delivered / s", + "type": "timeseries" + }, + { + "datasource": { + "uid": "${datasource}" + }, + "description": "The rate of messages that have been redelivered to consumers. It includes messages that have been requeued automatically and redelivered due to channel exceptions or connection closures.\n\nHaving some redeliveries is expected, but if this metric is consistently non-zero, it is worth investigating why.\n\n* [Negative Acknowledgement and Requeuing of Deliveries](https://www.rabbitmq.com/confirms.html#consumer-nacks-requeue)\n* [Consumers](https://www.rabbitmq.com/consumers.html)", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "dashed" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "transparent" + }, + { + "color": "orange", + "value": 20 + }, + { + "color": "red", + "value": 100 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?0(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#56A64B", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?1(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#F2CC0C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?2(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#3274D9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?3(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#A352CC", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?4(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FF780A", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?5(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#96D98D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?6(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFEE52", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?7(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#8AB8FF", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?8(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#CA95E5", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?9(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFB357", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 5, + "w": 12, + "x": 12, + "y": 55 + }, + "id": 15, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max", + "min" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(rabbitmq_channel_messages_redelivered_total{instance=~\"$instance\"}[60s]) * on(instance) group_left(rabbitmq_cluster) rabbitmq_identity_info{rabbitmq_cluster=~\"$rabbitmq_cluster\"}) by(instance)", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "Messages redelivered / s", + "type": "timeseries" + }, + { + "datasource": { + "uid": "${datasource}" + }, + "description": "The rate of message deliveries to consumers that use manual acknowledgement mode.\n\nWhen this mode is used, RabbitMQ waits for consumers to acknowledge messages before more messages can be delivered.\n\nThis is the safest way of consuming messages.\n\n* [Consumer Acknowledgements](https://www.rabbitmq.com/confirms.html)\n* [Consumer Prefetch](https://www.rabbitmq.com/consumer-prefetch.html)\n* [Consumer Acknowledgement Modes, Prefetch and Throughput](https://www.rabbitmq.com/confirms.html#channel-qos-prefetch-throughput)\n* [Consumers](https://www.rabbitmq.com/consumers.html)", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?0(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#56A64B", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?1(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#F2CC0C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?2(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#3274D9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?3(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#A352CC", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?4(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FF780A", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?5(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#96D98D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?6(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFEE52", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?7(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#8AB8FF", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?8(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#CA95E5", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?9(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFB357", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 5, + "w": 12, + "x": 0, + "y": 60 + }, + "id": 20, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max", + "min" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(rabbitmq_channel_messages_delivered_ack_total{instance=~\"$instance\"}[60s]) * on(instance) group_left(rabbitmq_cluster) rabbitmq_identity_info{rabbitmq_cluster=~\"$rabbitmq_cluster\"}) by(instance)", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "Messages delivered with manual ack / s", + "type": "timeseries" + }, + { + "datasource": { + "uid": "${datasource}" + }, + "description": "The rate of message deliveries to consumers that use automatic acknowledgement mode.\n\nWhen this mode is used, RabbitMQ does not wait for consumers to acknowledge message deliveries.\n\nThis mode is fire-and-forget and does not offer any delivery safety guarantees. It tends to provide higher throughput and it may lead to consumer overload and higher consumer memory usage.\n\n* [Consumer Acknowledgement Modes, Prefetch and Throughput](https://www.rabbitmq.com/confirms.html#channel-qos-prefetch-throughput)\n* [Consumers](https://www.rabbitmq.com/consumers.html)", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?0(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#56A64B", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?1(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#F2CC0C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?2(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#3274D9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?3(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#A352CC", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?4(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FF780A", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?5(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#96D98D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?6(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFEE52", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?7(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#8AB8FF", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?8(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#CA95E5", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?9(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFB357", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 5, + "w": 12, + "x": 12, + "y": 60 + }, + "id": 21, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max", + "min" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(rabbitmq_channel_messages_delivered_total{instance=~\"$instance\"}[60s]) * on(instance) group_left(rabbitmq_cluster) rabbitmq_identity_info{rabbitmq_cluster=~\"$rabbitmq_cluster\"}) by(instance)", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "Messages delivered auto ack / s", + "type": "timeseries" + }, + { + "datasource": { + "uid": "${datasource}" + }, + "description": "The rate of message acknowledgements coming from consumers that use manual acknowledgement mode.\n\n* [Consumer Acknowledgements](https://www.rabbitmq.com/confirms.html)\n* [Consumer Prefetch](https://www.rabbitmq.com/consumer-prefetch.html)\n* [Consumer Acknowledgement Modes, Prefetch and Throughput](https://www.rabbitmq.com/confirms.html#channel-qos-prefetch-throughput)\n* [Consumers](https://www.rabbitmq.com/consumers.html)", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?0(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#56A64B", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?1(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#F2CC0C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?2(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#3274D9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?3(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#A352CC", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?4(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FF780A", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?5(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#96D98D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?6(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFEE52", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?7(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#8AB8FF", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?8(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#CA95E5", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?9(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFB357", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 5, + "w": 12, + "x": 0, + "y": 65 + }, + "id": 22, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max", + "min" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(rabbitmq_channel_messages_acked_total{instance=~\"$instance\"}[60s]) * on(instance) group_left(rabbitmq_cluster) rabbitmq_identity_info{rabbitmq_cluster=~\"$rabbitmq_cluster\"}) by(instance)", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "Messages acknowledged / s", + "type": "timeseries" + }, + { + "datasource": { + "uid": "${datasource}" + }, + "description": "The rate of messages delivered to polling consumers that use automatic acknowledgement mode.\n\nThe use of polling consumers is highly inefficient and therefore strongly discouraged.\n\n* [Fetching individual messages](https://www.rabbitmq.com/consumers.html#fetching)\n* [Consumers](https://www.rabbitmq.com/consumers.html)", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "dashed" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "transparent" + }, + { + "color": "red", + "value": 0 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/rabbit/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#C4162A", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 5, + "w": 12, + "x": 12, + "y": 65 + }, + "id": 24, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max", + "min" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(rabbitmq_channel_get_total{instance=~\"$instance\"}[60s]) * on(instance) group_left(rabbitmq_cluster) rabbitmq_identity_info{rabbitmq_cluster=~\"$rabbitmq_cluster\"}) by(instance)", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "Polling operations with auto ack / s", + "type": "timeseries" + }, + { + "datasource": { + "uid": "${datasource}" + }, + "description": "The rate of polling consumer operations that yield no result.\n\nAny value above zero means that RabbitMQ resources are wasted by polling consumers.\n\nCompare this metric to the other polling consumer metrics to see the inefficiency rate.\n\nThe use of polling consumers is highly inefficient and therefore strongly discouraged.\n\n* [Fetching individual messages](https://www.rabbitmq.com/consumers.html#fetching)\n* [Consumers](https://www.rabbitmq.com/consumers.html)", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "dashed" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "transparent" + }, + { + "color": "red", + "value": 0 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/rabbit/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#C4162A", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 5, + "w": 12, + "x": 0, + "y": 70 + }, + "id": 25, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max", + "min" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(rabbitmq_channel_get_empty_total{instance=~\"$instance\"}[60s]) * on(instance) group_left(rabbitmq_cluster) rabbitmq_identity_info{rabbitmq_cluster=~\"$rabbitmq_cluster\"}) by(instance)", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "Polling operations that yield no result / s", + "type": "timeseries" + }, + { + "datasource": { + "uid": "${datasource}" + }, + "description": "The rate of messages delivered to polling consumers that use manual acknowledgement mode.\n\nThe use of polling consumers is highly inefficient and therefore strongly discouraged.\n\n* [Fetching individual messages](https://www.rabbitmq.com/consumers.html#fetching)\n* [Consumers](https://www.rabbitmq.com/consumers.html)", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "dashed" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "transparent" + }, + { + "color": "red", + "value": 0 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/rabbit/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#C4162A", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 5, + "w": 12, + "x": 12, + "y": 70 + }, + "id": 23, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max", + "min" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(rabbitmq_channel_get_ack_total{instance=~\"$instance\"}[60s]) * on(instance) group_left(rabbitmq_cluster) rabbitmq_identity_info{rabbitmq_cluster=~\"$rabbitmq_cluster\"}) by(instance)", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "Polling operations with manual ack / s", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 75 + }, + "id": 53, + "panels": [], + "title": "Queues", + "type": "row" + }, + { + "datasource": { + "uid": "${datasource}" + }, + "description": "Total number of queue masters per node. \n\nThis metric makes it easy to see sub-optimal queue distribution in a cluster.\n\n* [Queue Masters, Data Locality](https://www.rabbitmq.com/ha.html#master-migration-data-locality)\n* [Queues](https://www.rabbitmq.com/queues.html)", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": -1, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?0(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#56A64B", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?1(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#F2CC0C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?2(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#3274D9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?3(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#A352CC", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?4(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FF780A", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?5(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#96D98D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?6(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFEE52", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?7(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#8AB8FF", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?8(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#CA95E5", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?9(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFB357", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 5, + "w": 12, + "x": 0, + "y": 76 + }, + "id": 57, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max", + "min" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "exemplar": true, + "expr": "rabbitmq_queues{instance=~\"$instance\"} * on(instance) group_left(rabbitmq_cluster) rabbitmq_identity_info{rabbitmq_cluster=~\"$rabbitmq_cluster\"}", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "Total queues", + "type": "timeseries" + }, + { + "datasource": { + "uid": "${datasource}" + }, + "description": "The rate of queue declarations performed by clients.\n\nLow sustained values above zero are to be expected. High rates may be indicative of queue churn or high rates of connection recovery. Confirm connection recovery rates by using the _Connections opened_ metric.\n\n* [Queues](https://www.rabbitmq.com/queues.html)", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "line+area" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "transparent" + }, + { + "color": "orange", + "value": 2 + }, + { + "color": "red", + "value": 10 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?0(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#56A64B", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?1(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#F2CC0C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?2(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#3274D9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?3(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#A352CC", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?4(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FF780A", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?5(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#96D98D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?6(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFEE52", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?7(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#8AB8FF", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?8(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#CA95E5", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?9(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFB357", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 12, + "y": 76 + }, + "id": 58, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max", + "min" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(rabbitmq_queues_declared_total{instance=~\"$instance\"}[60s]) * on(instance) group_left(rabbitmq_cluster) rabbitmq_identity_info{rabbitmq_cluster=~\"$rabbitmq_cluster\"}) by(instance)", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "Queues declared / s", + "type": "timeseries" + }, + { + "datasource": { + "uid": "${datasource}" + }, + "description": "The rate of new queues created (as opposed to redeclarations).\n\nLow sustained values above zero are to be expected. High rates may be indicative of queue churn or high rates of connection recovery. Confirm connection recovery rates by using the _Connections opened_ metric.\n\n* [Queues](https://www.rabbitmq.com/queues.html)", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "line+area" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "transparent" + }, + { + "color": "orange", + "value": 2 + }, + { + "color": "red", + "value": 10 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?0(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#56A64B", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?1(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#F2CC0C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?2(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#3274D9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?3(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#A352CC", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?4(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FF780A", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?5(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#96D98D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?6(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFEE52", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?7(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#8AB8FF", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?8(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#CA95E5", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?9(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFB357", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 16, + "y": 76 + }, + "id": 60, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max", + "min" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(rabbitmq_queues_created_total{instance=~\"$instance\"}[60s]) * on(instance) group_left(rabbitmq_cluster) rabbitmq_identity_info{rabbitmq_cluster=~\"$rabbitmq_cluster\"}) by(instance)", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "Queues created / s", + "type": "timeseries" + }, + { + "datasource": { + "uid": "${datasource}" + }, + "description": "The rate of queues deleted.\n\nLow sustained values above zero are to be expected. High rates may be indicative of queue churn or high rates of connection recovery. Confirm connection recovery rates by using the _Connections opened_ metric.\n\n* [Queues](https://www.rabbitmq.com/queues.html)", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "dashed" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "transparent" + }, + { + "color": "orange", + "value": 2 + }, + { + "color": "red", + "value": 10 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?0(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#56A64B", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?1(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#F2CC0C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?2(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#3274D9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?3(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#A352CC", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?4(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FF780A", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?5(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#96D98D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?6(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFEE52", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?7(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#8AB8FF", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?8(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#CA95E5", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?9(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFB357", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 20, + "y": 76 + }, + "id": 59, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max", + "min" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(rabbitmq_queues_deleted_total{instance=~\"$instance\"}[60s]) * on(instance) group_left(rabbitmq_cluster) rabbitmq_identity_info{rabbitmq_cluster=~\"$rabbitmq_cluster\"}) by(instance)", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "Queues deleted / s", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 81 + }, + "id": 51, + "panels": [], + "title": "Channels", + "type": "row" + }, + { + "datasource": { + "uid": "${datasource}" + }, + "description": "Total number of channels on all currently opened connections.\n\nIf this metric grows monotonically it is highly likely a channel leak in one of the applications. Confirm channel leaks by using the _Channels opened_ and _Channels closed_ metrics.\n\n* [Channel Leak](https://www.rabbitmq.com/channels.html#channel-leaks)\n* [Channels](https://www.rabbitmq.com/channels.html)", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": -1, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?0(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#56A64B", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?1(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#F2CC0C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?2(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#3274D9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?3(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#A352CC", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?4(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FF780A", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?5(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#96D98D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?6(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFEE52", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?7(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#8AB8FF", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?8(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#CA95E5", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?9(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFB357", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 5, + "w": 12, + "x": 0, + "y": 82 + }, + "id": 54, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max", + "min" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "exemplar": true, + "expr": "rabbitmq_channels{instance=~\"$instance\"} * on(instance) group_left(rabbitmq_cluster) rabbitmq_identity_info{rabbitmq_cluster=~\"$rabbitmq_cluster\"}", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "Total channels", + "type": "timeseries" + }, + { + "datasource": { + "uid": "${datasource}" + }, + "description": "The rate of new channels opened by applications across all connections. Channels are expected to be long-lived.\n\nLow sustained values above zero are to be expected. High rates may be indicative of channel churn or mass connection recovery. Confirm connection recovery rates by using the _Connections opened_ metric.\n\n* [High Channel Churn](https://www.rabbitmq.com/channels.html#high-channel-churn)\n* [Channels](https://www.rabbitmq.com/channels.html)", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "dashed" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "transparent" + }, + { + "color": "orange", + "value": 2 + }, + { + "color": "red", + "value": 10 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?0(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#56A64B", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?1(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#F2CC0C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?2(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#3274D9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?3(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#A352CC", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?4(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FF780A", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?5(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#96D98D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?6(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFEE52", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?7(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#8AB8FF", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?8(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#CA95E5", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?9(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFB357", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 12, + "y": 82 + }, + "id": 55, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max", + "min" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(rabbitmq_channels_opened_total{instance=~\"$instance\"}[60s]) * on(instance) group_left(rabbitmq_cluster) rabbitmq_identity_info{rabbitmq_cluster=~\"$rabbitmq_cluster\"}) by(instance)", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "Channels opened / s", + "type": "timeseries" + }, + { + "datasource": { + "uid": "${datasource}" + }, + "description": "The rate of channels closed by applications across all connections. Channels are expected to be long-lived.\n\nLow sustained values above zero are to be expected. High rates may be indicative of channel churn or mass connection recovery. Confirm connection recovery rates by using the _Connections opened_ metric.\n\n* [High Channel Churn](https://www.rabbitmq.com/channels.html#high-channel-churn)\n* [Channels](https://www.rabbitmq.com/channels.html)", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "dashed" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "transparent" + }, + { + "color": "orange", + "value": 2 + }, + { + "color": "red", + "value": 10 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?0(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#56A64B", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?1(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#F2CC0C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?2(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#3274D9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?3(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#A352CC", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?4(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FF780A", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?5(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#96D98D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?6(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFEE52", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?7(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#8AB8FF", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?8(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#CA95E5", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?9(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFB357", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 18, + "y": 82 + }, + "id": 56, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max", + "min" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(rabbitmq_channels_closed_total{instance=~\"$instance\"}[60s]) * on(instance) group_left(rabbitmq_cluster) rabbitmq_identity_info{rabbitmq_cluster=~\"$rabbitmq_cluster\"}) by(instance)", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "Channels closed / s", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 87 + }, + "id": 46, + "panels": [], + "title": "Connections", + "type": "row" + }, + { + "datasource": { + "uid": "${datasource}" + }, + "description": "Total number of client connections.\n\nIf this metric grows monotonically it is highly likely a connection leak in one of the applications. Confirm connection leaks by using the _Connections opened_ and _Connections closed_ metrics.\n\n* [Connection Leak](https://www.rabbitmq.com/connections.html#monitoring)\n* [Connections](https://www.rabbitmq.com/connections.html)", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": -1, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?0(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#56A64B", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?1(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#F2CC0C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?2(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#3274D9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?3(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#A352CC", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?4(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FF780A", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?5(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#96D98D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?6(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFEE52", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?7(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#8AB8FF", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?8(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#CA95E5", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?9(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFB357", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 5, + "w": 12, + "x": 0, + "y": 88 + }, + "id": 47, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max", + "min" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "exemplar": true, + "expr": "rabbitmq_connections{instance=~\"$instance\"} * on(instance) group_left(rabbitmq_cluster,rabbitmq_node) rabbitmq_identity_info{rabbitmq_cluster=~\"$rabbitmq_cluster\"}", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "Total connections", + "type": "timeseries" + }, + { + "datasource": { + "uid": "${datasource}" + }, + "description": "The rate of new connections opened by clients. Connections are expected to be long-lived.\n\nLow sustained values above zero are to be expected. High rates may be indicative of connection churn or mass connection recovery.\n\n* [Connection Leak](https://www.rabbitmq.com/connections.html#monitoring)\n* [Connections](https://www.rabbitmq.com/connections.html)", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "line+area" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "transparent" + }, + { + "color": "orange", + "value": 2 + }, + { + "color": "red", + "value": 10 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?0(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#56A64B", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?1(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#F2CC0C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?2(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#3274D9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?3(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#A352CC", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?4(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FF780A", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?5(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#96D98D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?6(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFEE52", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?7(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#8AB8FF", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?8(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#CA95E5", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^rabbit@[a-zA-Z\\.\\-]*?9(\\b|\\.)/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFB357", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 12, + "y": 88 + }, + "id": 48, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max", + "min" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(rabbitmq_connections_opened_total{instance=~\"$instance\"}[60s]) * on(instance) group_left(rabbitmq_cluster) rabbitmq_identity_info{rabbitmq_cluster=~\"$rabbitmq_cluster\"}) by(instance)", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "Connections opened / s", + "type": "timeseries" + }, + { + "datasource": { + "uid": "${datasource}" + }, + "description": "The rate of connections closed. Connections are expected to be long-lived.\n\nLow sustained values above zero are to be expected. High rates may be indicative of connection churn or mass connection recovery.\n\n* [Connections](https://www.rabbitmq.com/connections.html)", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 18, + "y": 88 + }, + "id": 49, + "options": { + "alertThreshold": true + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(rabbitmq_connections_closed_total{instance=~\"$instance\"}[60s]) * on(instance) group_left(rabbitmq_cluster) rabbitmq_identity_info{rabbitmq_cluster=~\"$rabbitmq_cluster\"}) by(instance) ", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "Connections closed / s", + "type": "timeseries" + } + ], + "preload": false, + "refresh": "", + "schemaVersion": 40, + "tags": [], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus (Local)", + "value": "localprometheus" + }, + "includeAll": false, + "label": "Prometheus", + "name": "datasource", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "current": { + "text": "All", + "value": "$__all" + }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "definition": "label_values(rabbitmq_identity_info{},rabbitmq_cluster)", + "includeAll": true, + "label": "Cluster", + "name": "rabbitmq_cluster", + "options": [], + "query": { + "query": "label_values(rabbitmq_identity_info{},rabbitmq_cluster)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "type": "query" + }, + { + "current": { + "text": "All", + "value": "$__all" + }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "definition": "label_values(rabbitmq_identity_info{rabbitmq_cluster=~\"$rabbitmq_cluster\"},instance)", + "includeAll": true, + "label": "Instance", + "name": "instance", + "options": [], + "query": { + "query": "label_values(rabbitmq_identity_info{rabbitmq_cluster=~\"$rabbitmq_cluster\"},instance)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "type": "query" + } + ] + }, + "time": { + "from": "now-3h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "15s", + "30s", + "1m", + "5m", + "10m" + ] + }, + "timezone": "utc", + "title": "RabbitMQ Monitoring", + "uid": "Kn5xm-gZ5", + "version": 2, + "weekStart": "" +} \ No newline at end of file diff --git a/charts/infrahub-observability/templates/NOTES.txt b/charts/infrahub-observability/templates/NOTES.txt new file mode 100644 index 0000000..0a0367d --- /dev/null +++ b/charts/infrahub-observability/templates/NOTES.txt @@ -0,0 +1,38 @@ +{{ .Chart.Name }} {{ .Chart.Version }} has been installed as release "{{ .Release.Name }}" in namespace "{{ .Release.Namespace }}". + +The stack includes: Grafana Alloy, Loki, Tempo, Prometheus, Grafana, and the Prefect prometheus exporter. + +1. Access Grafana + + By default the Grafana Service is ClusterIP-only. Port-forward to reach the UI: + + kubectl --namespace {{ .Release.Namespace }} port-forward svc/{{ .Release.Name }}-grafana 3000:80 + + Then open http://localhost:3000. Look up the admin password with: + + kubectl --namespace {{ .Release.Namespace }} get secret {{ .Release.Name }}-grafana \ + -o jsonpath="{.data.admin-password}" | base64 -d ; echo + +2. Send traces from infrahub to this stack + + The infrahub chart exposes a `global.tracing` block. Point it at this Tempo: + + helm upgrade {{ .Values.global.infrahubReleaseName }} ./charts/infrahub \ + --namespace {{ include "infrahub-observability.infrahubNamespace" . }} \ + --reuse-values \ + --set global.tracing.enabled=true \ + --set global.tracing.endpoint={{ include "infrahub-observability.tempoOtlpGrpcEndpoint" . }} \ + --set global.tracing.protocol=grpc \ + --set global.tracing.insecure=true + +3. In-cluster endpoints (use these when wiring custom workloads) + + - OTLP gRPC (traces): {{ include "infrahub-observability.tempoOtlpGrpcEndpoint" . }} + - Prometheus remote-write: {{ include "infrahub-observability.prometheusRemoteWriteUrl" . }} + - Loki push API: {{ include "infrahub-observability.lokiPushUrl" . }} + +{{- if not .Values.prefectExporter.enabled }} + +NOTE: The Prefect exporter is disabled (.Values.prefectExporter.enabled=false). Prefect flow-run metrics +will not be collected. Set prefectExporter.enabled=true to deploy it. +{{- end }} diff --git a/charts/infrahub-observability/templates/_helpers.tpl b/charts/infrahub-observability/templates/_helpers.tpl new file mode 100644 index 0000000..f1fb2c1 --- /dev/null +++ b/charts/infrahub-observability/templates/_helpers.tpl @@ -0,0 +1,134 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "infrahub-observability.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "infrahub-observability.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "infrahub-observability.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "infrahub-observability.labels" -}} +helm.sh/chart: {{ include "infrahub-observability.chart" . }} +{{ include "infrahub-observability.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- if gt (len .Values.global.commonLabels) 0 }} +{{ .Values.global.commonLabels | toYaml }} +{{- end }} +{{- end }} + +{{/* +Common annotations +*/}} +{{- define "infrahub-observability.annotations" -}} +{{- if gt (len .Values.global.commonAnnotations) 0 -}} +{{ .Values.global.commonAnnotations | toYaml }} +{{- end }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "infrahub-observability.selectorLabels" -}} +app.kubernetes.io/name: {{ include "infrahub-observability.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- if gt (len .Values.global.podLabels) 0 }} +{{ .Values.global.podLabels | toYaml }} +{{- end }} +{{- end }} + +{{/* +Namespace where the sibling infrahub release lives. Defaults to the release namespace. +*/}} +{{- define "infrahub-observability.infrahubNamespace" -}} +{{- default .Release.Namespace .Values.global.infrahubNamespace -}} +{{- end }} + +{{/* +Service URL helpers — derived from the release name and the conventions of each +upstream sub-chart. Sub-chart service names follow `-` for grafana, +loki, tempo, alloy, node-exporter; `-prometheus-server` for prometheus. +*/}} +{{- define "infrahub-observability.prometheusUrl" -}} +http://{{ .Release.Name }}-prometheus-server +{{- end }} + +{{- define "infrahub-observability.prometheusRemoteWriteUrl" -}} +{{ include "infrahub-observability.prometheusUrl" . }}/api/v1/write +{{- end }} + +{{- define "infrahub-observability.lokiUrl" -}} +http://{{ .Release.Name }}-loki:3100 +{{- end }} + +{{- define "infrahub-observability.lokiPushUrl" -}} +{{ include "infrahub-observability.lokiUrl" . }}/loki/api/v1/push +{{- end }} + +{{- define "infrahub-observability.tempoUrl" -}} +http://{{ .Release.Name }}-tempo:3100 +{{- end }} + +{{- define "infrahub-observability.tempoOtlpGrpcEndpoint" -}} +{{ .Release.Name }}-tempo:4317 +{{- end }} + +{{/* +ConfigMap that holds Alloy's config.alloy. Must match what the Alloy subchart +resolves to when `alloy.alloy.configMap.name` is empty (see Alloy chart +templates/_config.tpl → `alloy.fullname`). Mirroring its logic here lets us +leave `configMap.name` unset in values.yaml. +*/}} +{{- define "infrahub-observability.alloyConfigMapName" -}} +{{- if contains "alloy" .Release.Name -}} +{{ .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else -}} +{{ printf "%s-alloy" .Release.Name | trunc 63 | trimSuffix "-" }} +{{- end -}} +{{- end }} + +{{- define "infrahub-observability.prefectExporterFullname" -}} +{{ include "infrahub-observability.fullname" . }}-prefect-exporter +{{- end }} + +{{/* +Default Prefect API URL. The infrahub chart embeds prefect-server as a +subchart whose Service is fixed-named "prefect-server" (not release-prefixed) +and the infrahub chart hard-codes PREFECT_API_URL accordingly. Users can +override via .Values.prefectExporter.prefectApiUrl. +*/}} +{{- define "infrahub-observability.prefectApiUrl" -}} +{{- if .Values.prefectExporter.prefectApiUrl -}} +{{ .Values.prefectExporter.prefectApiUrl }} +{{- else -}} +http://prefect-server:4200/api +{{- end -}} +{{- end }} diff --git a/charts/infrahub-observability/templates/alloy-config.yaml b/charts/infrahub-observability/templates/alloy-config.yaml new file mode 100644 index 0000000..a852579 --- /dev/null +++ b/charts/infrahub-observability/templates/alloy-config.yaml @@ -0,0 +1,378 @@ +{{- if .Values.alloy.enabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "infrahub-observability.alloyConfigMapName" . }} + namespace: {{ .Release.Namespace | quote }} + labels: + service: alloy + {{- include "infrahub-observability.labels" . | nindent 4 }} + {{- with (include "infrahub-observability.annotations" .) }} + annotations: + {{- . | nindent 4 }} + {{- end }} +data: + config.alloy: | + // ======================================================================== + // Grafana Alloy configuration for the infrahub-observability chart. + // Adapted from opsmill/infrahub@infrahub-v1.9.3 development/alloy/config.alloy + // for Kubernetes: Docker discovery replaced with kubernetes discovery, and + // Docker hostnames replaced with chart-rendered Kubernetes Service names. + // ======================================================================== + + logging { + level = "warn" + } + + // ------------------------------------------------------------------------ + // Log Pipeline + // ------------------------------------------------------------------------ + + loki.write "loki" { + endpoint { + url = "{{ include "infrahub-observability.lokiPushUrl" . }}" + } + } + + discovery.kubernetes "pods" { + role = "pod" + namespaces { + names = ["{{ include "infrahub-observability.infrahubNamespace" . }}"] + } + } + + discovery.relabel "pods" { + targets = discovery.kubernetes.pods.targets + + rule { + source_labels = ["__meta_kubernetes_pod_name"] + target_label = "pod" + } + rule { + source_labels = ["__meta_kubernetes_namespace"] + target_label = "namespace" + } + rule { + source_labels = ["__meta_kubernetes_pod_container_name"] + target_label = "container" + } + // Infrahub chart pods identify their role through one of two labels: + // - Top-level workloads (infrahub-server, infrahub-task-worker) carry + // `service: ` matching their fullname (e.g. infrahub-server). + // - Subchart workloads (cache, database, message-queue, task-manager-db) + // carry only `infrahub/service: ` (e.g. database). + // Prefer `service:` when present so the parsing stages below keep + // matching the existing component values; fall back to `infrahub/service:` + // for subchart pods so their logs still get a `component` label. + rule { + source_labels = ["__meta_kubernetes_pod_label_service"] + target_label = "component" + } + rule { + source_labels = ["component", "__meta_kubernetes_pod_label_infrahub_service"] + separator = ";" + regex = ";(.+)" + replacement = "$1" + target_label = "component" + } + } + + loki.source.kubernetes "pods" { + targets = discovery.relabel.pods.output + forward_to = [loki.process.pipeline.receiver] + } + + loki.process "pipeline" { + forward_to = [loki.write.loki.receiver] + + stage.decolorize {} + + // --- Database (Neo4j) --- + stage.match { + selector = `{component="database"}` + + stage.regex { + expression = `(?P\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d+[+-]\d{4})\s+(?P\w+)\s+(?P.*)` + } + stage.labels { + values = { + level = "", + timestamp = "", + } + } + } + + // --- Cache (Redis) --- + stage.match { + selector = `{component="cache"}` + + stage.regex { + expression = `(?P\d{2}\s+\w{3}\s+\d{4}\s+\d{2}:\d{2}:\d{2}\.\d+)\s+(?P[.#*-])\s+(?P.*)` + } + stage.template { + source = "level" + template = `{{ "{{" }} if eq .Value "." {{ "}}" }}DEBUG{{ "{{" }} else if eq .Value "-" {{ "}}" }}VERBOSE{{ "{{" }} else if eq .Value "*" {{ "}}" }}NOTICE{{ "{{" }} else if eq .Value "#" {{ "}}" }}WARNING{{ "{{" }} end {{ "}}" }}` + } + stage.labels { + values = { + level = "", + timestamp = "", + message = "", + } + } + } + + // --- Infrahub server (non-JSON mode) --- + stage.match { + selector = `{component="infrahub-server"}` + + stage.regex { + expression = `(?P\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+Z)\s+\[(?P\w+)\s*\]\s+(?P.*?)\s+\[(?P[^\]]+)\](?P.*)` + } + stage.template { + source = "message" + template = `{{ "{{" }} .message {{ "}}" }} {{ "{{" }} .additional_info {{ "}}" }}` + } + stage.labels { + values = { + level = "", + timestamp = "", + logger = "", + } + } + } + + // --- Task worker --- + stage.match { + selector = `{component="infrahub-task-worker"}` + + stage.regex { + expression = `(?P\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+Z)\s+\[(?P\w+)\s*\]\s+(?P.*?)\s+\[(?P[^\]]+)\]` + } + stage.labels { + values = { + level = "", + timestamp = "", + logger = "", + } + } + } + + // --- Message queue (RabbitMQ) --- + stage.match { + selector = `{component="message-queue"}` + + stage.regex { + expression = `(?P\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d+\+\d{2}:\d{2})\s+\[(?P\w+)\]\s+(?P[^>]+>)\s+(?P.*)` + } + stage.labels { + values = { + level = "", + timestamp = "", + } + } + } + + // Parse logfmt fields from the raw log line + stage.logfmt { + mapping = { + app = "", + request_id = "", + branch = "", + trace_id = "", + } + } + + // Normalize level to lowercase + stage.template { + source = "level" + template = `{{ "{{" }} ToLower .Value {{ "}}" }}` + } + + stage.static_labels { + values = { + job = "infrahub-alloy", + } + } + } + + // ------------------------------------------------------------------------ + // Metrics Pipeline + // ------------------------------------------------------------------------ + + prometheus.remote_write "prometheus" { + endpoint { + url = "{{ include "infrahub-observability.prometheusRemoteWriteUrl" . }}" + } + } + + // --- Prometheus self-monitoring --- + prometheus.scrape "prometheus" { + targets = [ + {"__address__" = "{{ .Release.Name }}-prometheus-server:80"}, + ] + forward_to = [prometheus.remote_write.prometheus.receiver] + job_name = "prometheus" + scrape_interval = "15s" + } + + // --- Infrahub server --- + prometheus.scrape "infrahub_server" { + targets = [ + {"__address__" = "{{ .Values.global.infrahubReleaseName }}-infrahub-server:8000"}, + ] + forward_to = [prometheus.remote_write.prometheus.receiver] + job_name = "infrahub-server" + scrape_interval = "15s" + } + + // --- Task worker (no Service in the infrahub chart — discover via pod label) --- + discovery.kubernetes "infrahub_worker" { + role = "pod" + namespaces { + names = ["{{ include "infrahub-observability.infrahubNamespace" . }}"] + } + } + + discovery.relabel "infrahub_worker" { + targets = discovery.kubernetes.infrahub_worker.targets + + rule { + action = "keep" + source_labels = ["__meta_kubernetes_pod_label_service"] + regex = "infrahub-task-worker" + } + rule { + source_labels = ["__address__"] + regex = "(.+?)(?::\\d+)?" + replacement = "$1:8000" + target_label = "__address__" + } + } + + prometheus.scrape "infrahub_worker" { + targets = discovery.relabel.infrahub_worker.output + forward_to = [prometheus.remote_write.prometheus.receiver] + job_name = "infrahub-worker" + scrape_interval = "15s" + } + + // --- Message queue (RabbitMQ prometheus exporter) --- + // The Bitnami rabbitmq chart exposes prometheus metrics on port 9419, + // not 15692 (the upstream rabbit prom plugin default). Adjust here if + // you override rabbit's metrics port. + prometheus.scrape "message_queue" { + targets = [ + {"__address__" = "{{ .Values.global.infrahubReleaseName }}-message-queue:9419"}, + ] + forward_to = [prometheus.remote_write.prometheus.receiver] + job_name = "message-queue" + scrape_interval = "15s" + } + + // --- Database (Neo4j) --- + // The Neo4j chart does not expose prometheus metrics by default. To + // enable, set `neo4j.config.metrics.prometheus.enabled: "true"` on the + // infrahub chart, expose port 2004, and uncomment this block. + // + // prometheus.scrape "database" { + // targets = [ + // {"__address__" = "{{ .Values.global.infrahubReleaseName }}-database:2004"}, + // ] + // forward_to = [prometheus.remote_write.prometheus.receiver] + // job_name = "database" + // scrape_interval = "15s" + // } + + // --- Task manager exporter (Prefect API poller, shipped in this chart) --- + prometheus.scrape "task_manager_exporter" { + targets = [ + {"__address__" = "{{ include "infrahub-observability.prefectExporterFullname" . }}:{{ .Values.prefectExporter.service.port }}"}, + ] + honor_timestamps = false + forward_to = [prometheus.remote_write.prometheus.receiver] + job_name = "task-manager-exporter" + scrape_interval = "15s" + } + + // --- Logs infrastructure (Loki + Alloy self-monitoring) --- + prometheus.scrape "logs" { + targets = [ + {"__address__" = "{{ .Release.Name }}-loki:3100"}, + {"__address__" = "{{ .Release.Name }}-alloy:12345"}, + ] + forward_to = [prometheus.remote_write.prometheus.receiver] + job_name = "logs" + scrape_interval = "15s" + } + + // --- Node exporter --- + prometheus.scrape "node_exporter" { + targets = [ + {"__address__" = "{{ .Release.Name }}-prometheus-node-exporter:9100"}, + ] + forward_to = [prometheus.remote_write.prometheus.receiver] + job_name = "node-exporter" + scrape_interval = "15s" + } + + {{- if .Values.alloy.cadvisor.enabled }} + // --- cAdvisor (kubelet) --- + // Per-container CPU/memory/network/filesystem metrics. Scraped via the + // API server's node-proxy endpoint so we don't need direct kubelet + // access. The Alloy subchart's ClusterRole already grants get on + // nodes/proxy — see `kubectl get clusterrole -alloy`. + discovery.kubernetes "nodes" { + role = "node" + } + + discovery.relabel "kubelet_cadvisor" { + targets = discovery.kubernetes.nodes.targets + + rule { + action = "replace" + target_label = "__address__" + replacement = "kubernetes.default.svc:443" + } + rule { + source_labels = ["__meta_kubernetes_node_name"] + regex = "(.+)" + replacement = "/api/v1/nodes/${1}/proxy/metrics/cadvisor" + target_label = "__metrics_path__" + } + rule { + source_labels = ["__meta_kubernetes_node_name"] + target_label = "node" + } + } + + prometheus.scrape "cadvisor" { + targets = discovery.relabel.kubelet_cadvisor.output + forward_to = [prometheus.remote_write.prometheus.receiver] + scheme = "https" + job_name = "cadvisor" + scrape_interval = "30s" + + tls_config { + ca_file = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt" + insecure_skip_verify = false + server_name = "kubernetes.default.svc" + } + bearer_token_file = "/var/run/secrets/kubernetes.io/serviceaccount/token" + } + {{- end }} + + // --- Task manager (Prefect server) --- + // The infrahub chart embeds prefect-server as a subchart whose Service + // is fixed-named "prefect-server" (no release prefix); the infrahub chart + // hard-codes PREFECT_API_URL accordingly. + prometheus.scrape "prefect" { + targets = [ + {"__address__" = "prefect-server:4200"}, + ] + forward_to = [prometheus.remote_write.prometheus.receiver] + job_name = "task-manager" + metrics_path = "/api/metrics" + scrape_interval = "15s" + } +{{- end }} diff --git a/charts/infrahub-observability/templates/grafana-dashboards.yaml b/charts/infrahub-observability/templates/grafana-dashboards.yaml new file mode 100644 index 0000000..ed2063c --- /dev/null +++ b/charts/infrahub-observability/templates/grafana-dashboards.yaml @@ -0,0 +1,29 @@ +{{- /* +One ConfigMap per dashboard JSON. Bundling all dashboards into a single +ConfigMap would exceed etcd's 1 MiB-per-object limit — the vendored set +already totals ~900 KiB and loki_monitoring.json alone is ~390 KiB. +*/ -}} +{{- if .Values.grafana.enabled }} +{{- $root := . }} +{{- range $path, $_ := .Files.Glob "dashboards/*.json" }} +{{- $file := base $path }} +{{- $name := $file | trimSuffix ".json" | replace "_" "-" }} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "infrahub-observability.fullname" $root }}-dashboard-{{ $name }} + namespace: {{ $root.Release.Namespace | quote }} + labels: + grafana_dashboard: "1" + service: grafana + {{- include "infrahub-observability.labels" $root | nindent 4 }} + {{- with (include "infrahub-observability.annotations" $root) }} + annotations: + {{- . | nindent 4 }} + {{- end }} +data: + {{ $file }}: |- + {{- $root.Files.Get $path | nindent 4 }} +{{- end }} +{{- end }} diff --git a/charts/infrahub-observability/templates/grafana-datasources.yaml b/charts/infrahub-observability/templates/grafana-datasources.yaml new file mode 100644 index 0000000..43e7777 --- /dev/null +++ b/charts/infrahub-observability/templates/grafana-datasources.yaml @@ -0,0 +1,80 @@ +{{- if .Values.grafana.enabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "infrahub-observability.fullname" . }}-grafana-datasources + namespace: {{ .Release.Namespace | quote }} + labels: + grafana_datasource: "1" + service: grafana + {{- include "infrahub-observability.labels" . | nindent 4 }} + {{- with (include "infrahub-observability.annotations" .) }} + annotations: + {{- . | nindent 4 }} + {{- end }} +data: + datasources.yaml: | + apiVersion: 1 + datasources: + - name: Prometheus + type: prometheus + uid: localprometheus + access: proxy + orgId: 1 + url: {{ include "infrahub-observability.prometheusUrl" . }} + isDefault: true + basicAuth: false + editable: true + jsonData: + httpMethod: POST + timeInterval: 15s + + - name: Loki + type: loki + uid: localloki + access: proxy + url: {{ include "infrahub-observability.lokiUrl" . }} + editable: true + jsonData: + maxLines: 1000 + derivedFields: + - name: TraceID + matcherRegex: 'TraceID=0x(\w+)' + url: '$${__value.raw}' + urlDisplayLabel: "View Trace via TraceID (in Tempo)" + datasourceUid: 'localtempo' + - name: RequestID + matcherRegex: 'RequestID=(\w+)' + url: '{name=~".*$${__value.raw}.*"}' + urlDisplayLabel: "View Trace via RequestID (in Tempo)" + datasourceUid: 'localtempo' + + - name: Tempo + type: tempo + uid: localtempo + access: proxy + url: {{ include "infrahub-observability.tempoUrl" . }} + basicAuth: false + editable: true + jsonData: + serviceMap: + datasourceUid: 'localprometheus' + tracesToLogsV2: + datasourceUid: 'localloki' + spanStartTimeShift: '1h' + spanEndTimeShift: '-1h' + filterByTraceID: false + filterBySpanID: false + tracesToMetrics: + datasourceUid: 'localprometheus' + spanStartTimeShift: '1h' + spanEndTimeShift: '-1h' + nodeGraph: + enabled: true + lokiSearch: + datasourceUid: 'localloki' + traceQuery: + timeShiftEnabled: true + spanStartTimeShift: '1h' + spanEndTimeShift: '-1h' +{{- end }} diff --git a/charts/infrahub-observability/templates/prefect-exporter-deployment.yaml b/charts/infrahub-observability/templates/prefect-exporter-deployment.yaml new file mode 100644 index 0000000..f28fa1d --- /dev/null +++ b/charts/infrahub-observability/templates/prefect-exporter-deployment.yaml @@ -0,0 +1,64 @@ +{{- if .Values.prefectExporter.enabled }} +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "infrahub-observability.prefectExporterFullname" . }} + namespace: {{ .Release.Namespace | quote }} + labels: + service: prefect-exporter + {{- include "infrahub-observability.labels" . | nindent 4 }} + annotations: + {{- include "infrahub-observability.annotations" . | nindent 4 }} +spec: + replicas: {{ .Values.prefectExporter.replicas }} + selector: + matchLabels: + service: prefect-exporter + {{- include "infrahub-observability.selectorLabels" . | nindent 6 }} + template: + metadata: + labels: + service: prefect-exporter + {{- include "infrahub-observability.selectorLabels" . | nindent 8 }} + {{- with .Values.prefectExporter.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + spec: + {{- with .Values.global.imagePullSecrets }} + imagePullSecrets: {{- toYaml . | nindent 8 }} + {{- end }} + securityContext: + {{- toYaml .Values.prefectExporter.securityContext | nindent 8 }} + containers: + - name: prefect-exporter + image: "{{ .Values.prefectExporter.image.repository }}:{{ .Values.prefectExporter.image.tag }}" + imagePullPolicy: {{ default .Values.global.imagePullPolicy .Values.prefectExporter.image.pullPolicy }} + env: + - name: PREFECT_API_URL + value: {{ include "infrahub-observability.prefectApiUrl" . | quote }} + - name: LOG_LEVEL + value: {{ .Values.prefectExporter.logLevel | quote }} + ports: + - name: metrics + containerPort: 8000 + protocol: TCP + readinessProbe: + httpGet: + path: /metrics + port: metrics + initialDelaySeconds: 10 + periodSeconds: 30 + resources: + {{- toYaml .Values.prefectExporter.resources | nindent 12 }} + {{- with .Values.prefectExporter.nodeSelector }} + nodeSelector: {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.prefectExporter.tolerations }} + tolerations: {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.prefectExporter.affinity }} + affinity: {{- toYaml . | nindent 8 }} + {{- end }} +{{- end }} diff --git a/charts/infrahub-observability/templates/prefect-exporter-service.yaml b/charts/infrahub-observability/templates/prefect-exporter-service.yaml new file mode 100644 index 0000000..d858fbb --- /dev/null +++ b/charts/infrahub-observability/templates/prefect-exporter-service.yaml @@ -0,0 +1,26 @@ +{{- if .Values.prefectExporter.enabled }} +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ include "infrahub-observability.prefectExporterFullname" . }} + namespace: {{ .Release.Namespace | quote }} + labels: + service: prefect-exporter + {{- include "infrahub-observability.labels" . | nindent 4 }} + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: {{ .Values.prefectExporter.service.port | quote }} + prometheus.io/path: "/metrics" + {{- include "infrahub-observability.annotations" . | nindent 4 }} +spec: + type: {{ .Values.prefectExporter.service.type }} + ports: + - name: metrics + port: {{ .Values.prefectExporter.service.port }} + targetPort: metrics + protocol: TCP + selector: + service: prefect-exporter + {{- include "infrahub-observability.selectorLabels" . | nindent 4 }} +{{- end }} diff --git a/charts/infrahub-observability/values.yaml b/charts/infrahub-observability/values.yaml new file mode 100644 index 0000000..b52ab61 --- /dev/null +++ b/charts/infrahub-observability/values.yaml @@ -0,0 +1,242 @@ +--- +# -- Default values for infrahub-observability. +# This is a YAML-formatted file. + +# -- Global values shared across all sub-charts and templates in this chart. +global: + # -- Cluster DNS domain. Used for fully-qualified service names if needed. + kubernetesClusterDomain: cluster.local + # -- Default imagePullPolicy for in-chart workloads (currently only the Prefect exporter). + imagePullPolicy: IfNotPresent + # -- Image pull secrets propagated to in-chart workloads. + imagePullSecrets: [] + # -- Labels added to every resource managed by this chart. + commonLabels: {} + # -- Annotations added to every resource managed by this chart. + commonAnnotations: {} + # -- Pod-level labels merged into the standard selector labels. + podLabels: {} + # -- Release name of the sibling infrahub chart. Used by the Prefect exporter + # to derive the default PREFECT_API_URL and by Alloy when scoping discovery. + infrahubReleaseName: infrahub + # -- Namespace where the sibling infrahub release lives. Empty string means + # the same namespace as this release. + infrahubNamespace: "" + +# ---------------------------------------------------------------------------- +# Alloy — unified collector (logs + metrics) +# +# We disable the upstream chart's auto-generated config and supply our own +# ConfigMap in templates/alloy-config.yaml. The DaemonSet controller is required +# so each node's Alloy can read pod logs from the local kubelet. +# ---------------------------------------------------------------------------- +alloy: + enabled: true + controller: + type: daemonset + alloy: + configMap: + create: false + # -- Name of the ConfigMap that holds Alloy's config.alloy file. + # Resolved at render time via the helper. + name: "" + key: config.alloy + mounts: + varlog: true + dockercontainers: false + clustering: + enabled: false + # -- Scrape kubelet cAdvisor for per-container CPU/memory/network/fs + # metrics. Requires the Alloy ServiceAccount to have `get nodes/proxy`, + # which the subchart's default RBAC already grants. Disable if your + # cluster's policy forbids that permission; the Container Resources + # and Neo4j Monitoring dashboards will then show no data. + cadvisor: + enabled: true + +# ---------------------------------------------------------------------------- +# Loki — log storage (single-binary mode for parity with docker-compose) +# ---------------------------------------------------------------------------- +loki: + enabled: true + deploymentMode: SingleBinary + loki: + auth_enabled: false + server: + log_level: warn + commonConfig: + replication_factor: 1 + storage: + type: filesystem + schemaConfig: + configs: + - from: "2024-04-01" + store: tsdb + object_store: filesystem + schema: v13 + index: + prefix: loki_index_ + period: 24h + limits_config: + reject_old_samples: true + reject_old_samples_max_age: 168h + allow_structured_metadata: true + max_global_streams_per_user: 15000 + ingestion_rate_mb: 32 + ingestion_burst_size_mb: 64 + max_entries_limit_per_query: 10000 + cardinality_limit: 100000 + per_stream_rate_limit: 3MB + per_stream_rate_limit_burst: 5MB + max_streams_per_user: 20000 + max_query_lookback: 24h + retention_period: 24h + compactor: + working_directory: /var/loki/compactor + compaction_interval: 10m + retention_enabled: true + retention_delete_delay: 2h + retention_delete_worker_count: 100 + delete_request_store: filesystem + singleBinary: + replicas: 1 + persistence: + enabled: true + size: 10Gi + read: + replicas: 0 + write: + replicas: 0 + backend: + replicas: 0 + chunksCache: + enabled: false + resultsCache: + enabled: false + test: + enabled: false + lokiCanary: + enabled: false + gateway: + enabled: false + +# ---------------------------------------------------------------------------- +# Tempo — distributed tracing (single-binary, OTLP receivers, metrics generator) +# ---------------------------------------------------------------------------- +tempo: + enabled: true + tempo: + receivers: + otlp: + protocols: + http: {} + grpc: + endpoint: 0.0.0.0:4317 + # metricsGenerator can extract RED metrics from spans and remote-write them + # into Prometheus. Disabled by default because enabling it requires a + # remoteWriteUrl that we cannot template from values.yaml. To enable, set: + # tempo.tempo.metricsGenerator.enabled: true + # tempo.tempo.metricsGenerator.remoteWriteUrl: "http://-prometheus-server/api/v1/write" + metricsGenerator: + enabled: false + retention: 96h + persistence: + enabled: true + size: 10Gi + +# ---------------------------------------------------------------------------- +# Prometheus — TSDB with remote-write receiver (Alloy is the source of truth +# for scrapes; Prometheus does no scraping itself). +# ---------------------------------------------------------------------------- +prometheus: + enabled: true + alertmanager: + enabled: false + prometheus-pushgateway: + enabled: false + prometheus-node-exporter: + enabled: false # we use the standalone subchart at the top level + kube-state-metrics: + enabled: false + server: + # web.enable-lifecycle and storage.tsdb.retention.time are set by the + # subchart defaults; passing them via extraArgs would cause duplicate-flag + # errors. Use `server.retention` for retention and `extraArgs` only for + # flags the subchart doesn't already provide. + retention: 96h + extraArgs: + web.enable-remote-write-receiver: "" + log.level: warn + persistentVolume: + enabled: true + size: 20Gi + serverFiles: + prometheus.yml: + scrape_configs: [] # Alloy pushes via remote_write; no Prometheus-side scrape + +# ---------------------------------------------------------------------------- +# Node exporter — host-level metrics +# ---------------------------------------------------------------------------- +prometheus-node-exporter: + enabled: true + +# ---------------------------------------------------------------------------- +# Grafana — visualization. Datasources and dashboards are provisioned via +# sidecar-watched ConfigMaps that this chart creates (see templates/). +# ---------------------------------------------------------------------------- +grafana: + enabled: true + adminUser: admin + # -- Default password matches docker-compose dev parity. Override via + # `grafana.admin.existingSecret` in production. + adminPassword: admin + service: + type: ClusterIP + ingress: + enabled: false + persistence: + enabled: true + size: 5Gi + sidecar: + dashboards: + enabled: true + label: grafana_dashboard + labelValue: "1" + searchNamespace: ALL + datasources: + enabled: true + label: grafana_datasource + labelValue: "1" + searchNamespace: ALL + env: + GF_USERS_ALLOW_SIGN_UP: "false" + GF_LOG_LEVEL: warn + +# ---------------------------------------------------------------------------- +# Prefect prometheus exporter — no upstream chart available, shipped as a +# small Deployment+Service in this chart. +# ---------------------------------------------------------------------------- +prefectExporter: + # -- Enable the Prefect prometheus exporter sidecar Deployment. + enabled: true + image: + repository: prefecthq/prometheus-prefect-exporter + tag: "3.3.0" + pullPolicy: "" # falls back to global.imagePullPolicy + # -- PREFECT_API_URL. Empty string defaults to the task-manager service of + # the sibling infrahub release (see _helpers.tpl). + prefectApiUrl: "" + # -- Log level passed to the exporter. + logLevel: WARNING + replicas: 1 + service: + type: ClusterIP + port: 8000 + podAnnotations: {} + resources: {} + nodeSelector: {} + tolerations: [] + affinity: {} + securityContext: + runAsNonRoot: true + runAsUser: 1000 diff --git a/charts/infrahub/Chart.yaml b/charts/infrahub/Chart.yaml index bc694d1..a169f1f 100644 --- a/charts/infrahub/Chart.yaml +++ b/charts/infrahub/Chart.yaml @@ -54,3 +54,7 @@ dependencies: version: "1.1.0" repository: "oci://registry.opsmill.io/opsmill/chart" condition: infrahub-backup.enabled + - name: infrahub-observability + version: "0.1.0" + repository: "oci://registry.opsmill.io/opsmill/chart" + condition: infrahub-observability.enabled diff --git a/charts/infrahub/README.md b/charts/infrahub/README.md index 913795b..4cd03f9 100644 --- a/charts/infrahub/README.md +++ b/charts/infrahub/README.md @@ -92,6 +92,11 @@ The chart offers the ability to configure persistence for the database and other | global.infrahubRepository | string | `"opsmill/infrahub"` | Repository for Infrahub images | | global.kubernetesClusterDomain | string | `"cluster.local"` | Kubernetes cluster domain | | global.podLabels | object | `{}` | Labels to use for all configured pods | +| global.tracing | object | `{"enabled":false,"endpoint":"","insecure":true,"protocol":"grpc"}` | Send traces to an OTLP collector. When enabled, INFRAHUB_TRACE_* env vars are injected into the server and task-worker Deployments. Pair with the infrahub-observability chart (Tempo endpoint: -tempo:4317) or any other OTLP-compatible collector. | +| global.tracing.enabled | bool | `false` | Enable tracing instrumentation on server and task-worker pods. | +| global.tracing.endpoint | string | `""` | OTLP endpoint. For grpc protocol, use host:port (no scheme). For http/protobuf, use a full URL. Example: "obs-tempo:4317". | +| global.tracing.insecure | bool | `true` | Skip TLS verification when talking to the collector. | +| global.tracing.protocol | string | `"grpc"` | OTLP protocol. One of: grpc, http/protobuf. | | infrahub-backup.backup | object | `{"enabled":false,"mode":"cronjob","schedule":"0 2 * * *","storage":{"s3":{"bucket":"","endpoint":"","prefix":"","region":"us-east-1","secretName":""},"type":"s3"}}` | Backup configuration | | infrahub-backup.enabled | bool | `false` | Whether to enable Infrahub Backup | | infrahub-backup.restore | object | `{"enabled":false,"s3":{"bucket":"","endpoint":"","key":"","region":"us-east-1","secretName":""}}` | Restore configuration | @@ -197,7 +202,7 @@ The chart offers the ability to configure persistence for the database and other | neo4j.volumes.data.mode | string | `"volume"` | | | neo4j.volumes.data.volume.emptyDir | object | `{}` | | | prefect-server.enabled | bool | `true` | | -| prefect-server.global.prefect.image.prefectTag | string | `"1.8.2"` | | +| prefect-server.global.prefect.image.prefectTag | string | `"1.9.3"` | | | prefect-server.global.prefect.image.repository | string | `"registry.opsmill.io/opsmill/infrahub"` | | | prefect-server.postgresql.enabled | bool | `true` | | | prefect-server.postgresql.image.repository | string | `"bitnamilegacy/postgresql"` | | @@ -234,6 +239,7 @@ The chart offers the ability to configure persistence for the database and other | redis.image.tag | string | `"8.2.1-debian-12-r0"` | | | redis.master.persistence.enabled | bool | `false` | | | redis.master.podLabels.infrahub/service | string | `"cache"` | | +| redis.master.resourcesPreset | string | `"medium"` | | | redis.master.service.ports.redis | int | `6379` | | | redis.nameOverride | string | `"cache"` | | | upgrade.enabled | bool | `false` | Whether to run infrahub upgrade as a post-install/pre-upgrade hook job | diff --git a/charts/infrahub/templates/_env.tpl b/charts/infrahub/templates/_env.tpl index 1754abd..0aa6e66 100644 --- a/charts/infrahub/templates/_env.tpl +++ b/charts/infrahub/templates/_env.tpl @@ -124,3 +124,44 @@ Define default env variables if required. value: http://{{ include "infrahub-helm.fullname" . }}-infrahub-server:8000 {{- end }} {{- end }} + +{{/* +Tracing env vars emitted onto server and task-worker pods when either +.Values.global.tracing.enabled or the infrahub-observability subchart is +enabled. The latter implies tracing on because the bundled stack ships a +Tempo collector — when observability is on and tracing.endpoint is unset, +the endpoint defaults to "-tempo:4317". + +The INFRAHUB_TRACE_* names match upstream TraceSettings (env_prefix +INFRAHUB_TRACE_) in backend/infrahub/config.py. + +The OTEL_EXPORTER_OTLP_* names are the OpenTelemetry SDK's standard env +vars. They're emitted because upstream infrahub's create_tracer_provider() +constructs the OTLP gRPC exporter without forwarding the `insecure` setting +from INFRAHUB_TRACE_INSECURE — meaning the gRPC client defaults to TLS and +fails the handshake against a plaintext OTLP collector. Setting +OTEL_EXPORTER_OTLP_INSECURE makes the OTel SDK itself honour the setting. +*/}} +{{- define "infrahub-helm.tracingEnv" -}} +{{- $obsEnabled := index .Values "infrahub-observability" "enabled" -}} +{{- if or .Values.global.tracing.enabled $obsEnabled }} +{{- $endpoint := .Values.global.tracing.endpoint -}} +{{- if and (not $endpoint) $obsEnabled -}} +{{- $endpoint = printf "%s-tempo:4317" .Release.Name -}} +{{- end }} +- name: INFRAHUB_TRACE_ENABLE + value: "true" +- name: INFRAHUB_TRACE_INSECURE + value: {{ .Values.global.tracing.insecure | quote }} +- name: INFRAHUB_TRACE_EXPORTER_TYPE + value: "otlp" +- name: INFRAHUB_TRACE_EXPORTER_PROTOCOL + value: {{ .Values.global.tracing.protocol | quote }} +- name: INFRAHUB_TRACE_EXPORTER_ENDPOINT + value: {{ $endpoint | quote }} +- name: OTEL_EXPORTER_OTLP_INSECURE + value: {{ .Values.global.tracing.insecure | quote }} +- name: OTEL_EXPORTER_OTLP_TRACES_INSECURE + value: {{ .Values.global.tracing.insecure | quote }} +{{- end }} +{{- end }} diff --git a/charts/infrahub/templates/infrahub-server.yaml b/charts/infrahub/templates/infrahub-server.yaml index 6cfcdc4..7249c6e 100644 --- a/charts/infrahub/templates/infrahub-server.yaml +++ b/charts/infrahub/templates/infrahub-server.yaml @@ -59,6 +59,7 @@ spec: - args: {{- toYaml .Values.infrahubServer.infrahubServer.args | nindent 12 }} env: {{- include "infrahub-helm.infrahubServer.defaultEnv" . | nindent 12 }} + {{- include "infrahub-helm.tracingEnv" . | nindent 12 }} {{- range $key, $value := .Values.infrahubServer.infrahubServer.env }} {{- if or $value (not (or (eq $key "INFRAHUB_INITIAL_ADMIN_TOKEN") (eq $key "INFRAHUB_SECURITY_SECRET_KEY") (eq $key "INFRAHUB_API_TOKEN"))) }} - name: {{ $key }} diff --git a/charts/infrahub/templates/infrahub-task-worker.yaml b/charts/infrahub/templates/infrahub-task-worker.yaml index 4411577..30a808b 100644 --- a/charts/infrahub/templates/infrahub-task-worker.yaml +++ b/charts/infrahub/templates/infrahub-task-worker.yaml @@ -60,6 +60,7 @@ spec: args: {{- toYaml .Values.infrahubTaskWorker.infrahubTaskWorker.args | nindent 12 }} env: {{- include "infrahub-helm.infrahubTaskWorker.defaultEnv" . | nindent 12 }} + {{- include "infrahub-helm.tracingEnv" . | nindent 12 }} {{- range $key, $value := .Values.infrahubTaskWorker.infrahubTaskWorker.env }} {{- if or $value (not (or (eq $key "INFRAHUB_INITIAL_ADMIN_TOKEN") (eq $key "INFRAHUB_SECURITY_SECRET_KEY") (eq $key "INFRAHUB_API_TOKEN"))) }} - name: {{ $key }} diff --git a/charts/infrahub/values.yaml b/charts/infrahub/values.yaml index 2bf8120..9097778 100644 --- a/charts/infrahub/values.yaml +++ b/charts/infrahub/values.yaml @@ -14,6 +14,24 @@ global: commonAnnotations: {} # -- Labels to use for all configured pods podLabels: {} + # -- Send traces to an OTLP collector. When enabled, INFRAHUB_TRACE_* env + # vars are injected into the server and task-worker Deployments. Implied + # true when `infrahub-observability.enabled` is true. Pair with the + # infrahub-observability chart (Tempo endpoint: -tempo:4317) or + # any other OTLP-compatible collector. + tracing: + # -- Enable tracing instrumentation on server and task-worker pods. + # Implied true when `infrahub-observability.enabled` is true. + enabled: false + # -- OTLP endpoint. For grpc protocol, use host:port (no scheme). + # For http/protobuf, use a full URL. Example: "obs-tempo:4317". + # When empty and `infrahub-observability.enabled` is true, defaults to + # "-tempo:4317". + endpoint: "" + # -- OTLP protocol. One of: grpc, http/protobuf. + protocol: grpc + # -- Skip TLS verification when talking to the collector. + insecure: true # ----------- Cache ----------- # Resource presets: nano, micro, small, medium, large, xlarge, 2xlarge @@ -452,3 +470,12 @@ infrahub-backup: endpoint: "" region: "us-east-1" secretName: "" + +# -------------- Infrahub Observability --------------- +# Bundled observability stack (Alloy / Loki / Tempo / Prometheus / Grafana). +# When enabled, tracing env vars are also injected on server and task-worker +# pods and default to the bundled Tempo service unless `global.tracing.endpoint` +# is set explicitly. Subchart values can be overridden under this block. +infrahub-observability: + # -- Deploy the infrahub-observability subchart and force tracing on. + enabled: false diff --git a/docs/local-testing-observability.md b/docs/local-testing-observability.md new file mode 100644 index 0000000..2e275d7 --- /dev/null +++ b/docs/local-testing-observability.md @@ -0,0 +1,479 @@ +# Local testing: infrahub + infrahub-observability + +This guide walks through installing the [infrahub](../charts/infrahub) and +[infrahub-observability](../charts/infrahub-observability) charts side-by-side +in the same namespace, wiring infrahub to send traces to Tempo, and verifying +that logs, metrics, and traces all reach Grafana. + +**Cluster-agnostic.** Any Kubernetes cluster will do (kind, minikube, k3d, +Docker Desktop, EKS/GKE/AKS, etc.) — these steps assume `kubectl` is already +pointed at the cluster you want to test against and the default +StorageClass can provision PVs (Loki, Prometheus, Tempo, Grafana, and Neo4j +all request persistent volumes by default). + +## Prerequisites + +- `kubectl` configured against a working cluster +- `helm` 3.0+ +- A cluster with at least ~8 CPU and ~12 GiB of memory available — the full + stack (Neo4j, RabbitMQ, Redis, Prefect, Loki, Prometheus, Tempo, Grafana) + is not lightweight +- A working default StorageClass + +Verify: + +```sh +kubectl cluster-info +kubectl get storageclass +``` + +## 1. Create the namespace + +Both charts must live in the same namespace so that the observability chart's +service helpers resolve to the right targets. + +```sh +kubectl create namespace infrahub +``` + +## 2. Install the infrahub chart + +From the repository root: + +```sh +helm dependency update charts/infrahub +helm install infrahub charts/infrahub \ + --namespace infrahub \ + --wait --timeout 15m +``` + +Wait for the rollout to settle: + +```sh +kubectl --namespace infrahub get pods -w +``` + +You should see (eventually) all of these pods `Running` and `1/1` ready: + +- `infrahub-infrahub-server-*` +- `infrahub-infrahub-task-worker-*` (× 2 replicas) +- `infrahub-database-0` (Neo4j, headless StatefulSet) +- `infrahub-cache-master-0` (Redis) +- `infrahub-message-queue-0` (RabbitMQ) +- `infrahub-postgresql-0` (used by Prefect) +- `prefect-server-*` (note: no `infrahub-` prefix — the Prefect subchart + uses a fixed Service name `prefect-server`, which the observability chart + scrapes by that exact name) + +## 3. Install the infrahub-observability chart + +```sh +helm dependency update charts/infrahub-observability +helm install obs charts/infrahub-observability \ + --namespace infrahub \ + --wait --timeout 15m +``` + +The chart's helpers default to `global.infrahubReleaseName: infrahub` and +the current namespace, so no overrides are needed when both releases live in +the `infrahub` namespace and the infrahub release is named `infrahub`. + +If you used a different release name for infrahub, override it: + +```sh +helm install obs charts/infrahub-observability \ + --namespace infrahub \ + --set global.infrahubReleaseName= +``` + +The post-install NOTES print the exact endpoints — re-read them later with: + +```sh +helm status obs --namespace infrahub +``` + +## 4. Wire infrahub to send traces to Tempo + +The infrahub chart exposes a `global.tracing` block that emits the +`INFRAHUB_TRACE_*` env vars on the server and task-worker Deployments. Point +it at the Tempo service the observability chart created: + +```sh +helm upgrade infrahub charts/infrahub \ + --namespace infrahub \ + --reuse-values \ + --set global.tracing.enabled=true \ + --set global.tracing.endpoint=obs-tempo:4317 \ + --set global.tracing.protocol=grpc \ + --set global.tracing.insecure=true +``` + +This triggers a rolling restart of the server and task-worker pods. Confirm +the env vars landed on a running pod: + +```sh +kubectl --namespace infrahub get pod \ + -l service=infrahub-server \ + -o jsonpath='{.items[0].spec.containers[0].env[*].name}' \ + | tr ' ' '\n' | grep INFRAHUB_TRACE +``` + +Expected: + +``` +INFRAHUB_TRACE_ENABLE +INFRAHUB_TRACE_INSECURE +INFRAHUB_TRACE_EXPORTER_TYPE +INFRAHUB_TRACE_EXPORTER_PROTOCOL +INFRAHUB_TRACE_EXPORTER_ENDPOINT +``` + +## 5. Access Grafana and verify the stack + +Port-forward Grafana: + +```sh +kubectl --namespace infrahub port-forward svc/obs-grafana 3000:80 +``` + +Look up the admin password: + +```sh +kubectl --namespace infrahub get secret obs-grafana \ + -o jsonpath="{.data.admin-password}" | base64 -d ; echo +``` + +Open and sign in as `admin` with that password. + +### Verify datasources + +Go to **Connections → Data sources**. You should see three datasources +auto-provisioned by the sidecar (the chart ships a ConfigMap labelled +`grafana_datasource=1`): + +- **Prometheus** — `http://obs-prometheus-server` +- **Loki** — `http://obs-loki:3100` +- **Tempo** — `http://obs-tempo:3100` + +Click each and hit **Save & test** — all three should report healthy. + +### Verify dashboards + +Go to **Dashboards**. You should see seven dashboards provisioned (one +ConfigMap per dashboard, labelled `grafana_dashboard=1`): + +- Container Resources +- Infrahub Monitoring +- Loki Monitoring +- Neo4j Monitoring +- Prefect Flow Run Overview +- Prefect Platform Overview +- RabbitMQ Instance Monitoring + +Open **Infrahub Monitoring** — panels backed by Prometheus should populate +within a few minutes. + +### Verify logs are flowing into Loki + +In Grafana, go to **Explore**, pick the **Loki** datasource, and run: + +```logql +{namespace="infrahub"} +``` + +You should see streaming log lines from infrahub pods within ~30 seconds of +the pods generating output. Alloy (running as a DaemonSet) is scraping +`/var/log/pods` and pushing into Loki. + +### Verify metrics are flowing into Prometheus + +Note: Prometheus does no scraping itself in this stack — Alloy is the source +of truth for scrapes and pushes via remote-write — so `kubectl port-forward +svc/obs-prometheus-server 9090:80` then visiting `/targets` will show an +empty page. Verify via the metrics themselves. + +In **Explore**, pick the **Prometheus** datasource, and run: + +```promql +group by (job) ({__name__!=""}) +``` + +You should see 8 jobs: `infrahub-server`, `infrahub-worker`, `logs`, +`message-queue`, `node-exporter`, `prometheus`, `task-manager`, +`task-manager-exporter`. The `database` job is not present by default +because the Neo4j chart doesn't expose prometheus metrics (see "Service +discovery and toggles" below). + +For Prefect-specific metrics: + +```promql +prefect_info_flow_runs +``` + +This series is populated by the Prefect exporter Deployment shipped by this +chart. + +### Verify traces are flowing into Tempo + +Exercise infrahub a bit so it emits spans: + +```sh +kubectl --namespace infrahub port-forward svc/infrahub-infrahub-server 8000:8000 +# in a separate terminal, hit a few endpoints +for i in $(seq 1 10); do + curl -s http://localhost:8000/api/schema/summary > /dev/null + curl -s -X POST -H "Content-Type: application/json" \ + -d '{"query":"{ Branch { edges { node { name } } } }"}' \ + http://localhost:8000/graphql > /dev/null +done +``` + +Wait ~10 seconds (Tempo batches), then in Grafana, **Explore** → **Tempo** +→ **Search**, set Service Name to `infrahub-server`, and click **Run +query**. You should see traces appear. + +You can also check from the CLI: + +```sh +kubectl --namespace infrahub port-forward svc/obs-tempo 3100:3100 +# separate terminal: +curl -s 'http://localhost:3100/api/search?tags=service.name%3Dinfrahub-server' | jq '.traces | length' +``` + +A non-zero count confirms traces are landing in Tempo. + +**Gotcha:** If you see `WRONG_VERSION_NUMBER` SSL handshake errors in the +infrahub-server logs (`kubectl logs -l service=infrahub-server`), make sure +you're running an infrahub chart that includes the `OTEL_EXPORTER_OTLP_INSECURE` +env-var fallback. Upstream infrahub's tracing wrapper doesn't honour +`INFRAHUB_TRACE_INSECURE` for the gRPC exporter — the OTel SDK env var is +what actually disables TLS. + +## 6. (Optional) Teardown + +```sh +helm uninstall obs --namespace infrahub +helm uninstall infrahub --namespace infrahub + +# Persistent volumes for Loki/Prometheus/Tempo/Grafana/Neo4j are NOT +# deleted with the releases. Remove them too if you want a clean slate: +kubectl --namespace infrahub delete pvc --all + +kubectl delete namespace infrahub +``` + +## Service discovery and toggles + +It's useful to know what the chart actually does for discovery — what it +scrapes by default, how it finds targets, and what the knobs are. + +### Logs — namespace-scoped pod discovery + +Alloy runs as a DaemonSet and uses `discovery.kubernetes` with +`role = "pod"`, scoped to a single namespace (the same namespace as the +sibling infrahub release, override via `global.infrahubNamespace`). **Every +pod in that namespace gets its logs shipped to Loki** — there is no label +filter on log ingestion. Pod log streams arrive in Loki with three +auto-promoted labels (`namespace`, `pod`, `container`) plus one chart- +specific label `component`, which is sourced from the `service:` pod label +that infrahub workloads carry (e.g. `service: infrahub-server`, +`service: database`). The `component` label is what the parsing pipeline +stages in the Alloy config key off of for per-workload log shape parsing. + +Toggles: + +| Setting | Effect | +| --- | --- | +| `global.infrahubNamespace` | Which namespace Alloy collects pod logs from. Empty = release namespace. | +| `alloy.enabled` | Disable the entire Alloy DaemonSet (no logs, no metric scraping). | +| `loki.enabled` | Disable Loki itself. Alloy will keep collecting but the write will fail; usually only disable both together. | + +### cAdvisor — per-container resource metrics + +Alloy scrapes the kubelet's `/metrics/cadvisor` endpoint (via the API server +proxy) once per node to collect `container_cpu_usage_seconds_total`, +`container_memory_usage_bytes`, `container_network_*`, and `container_fs_*` +series. These feed the **Container Resources** and **Neo4j Monitoring** +dashboards. + +The scrape needs `get` on `nodes/proxy` at cluster scope. The Alloy subchart +grants this by default — no extra RBAC. Disable via +`alloy.cadvisor.enabled: false` if your cluster policy forbids that +permission. + +**OrbStack gotcha:** OrbStack's kubelet exposes `/metrics/cadvisor` but +only emits `machine_*` metrics through it (no `container_*`). The container +data is reachable via `/stats/summary` but cAdvisor on that path is broken. +Other distributions (kind, minikube, EKS, GKE, AKS, k3d) work normally. +The dashboards expecting container metrics will be empty on OrbStack only. + +### Metrics — mostly hardcoded static targets + +For Prometheus metrics, the shipped Alloy config uses **static +`prometheus.scrape` blocks pointing at known Service:port endpoints**, not +annotation- or label-based auto-discovery. The default scrape list is: + +| Job | Target | Source chart | +| --- | --- | --- | +| `prometheus` | `-prometheus-server:80` | self | +| `infrahub-server` | `-infrahub-server:8000` | infrahub | +| `infrahub-worker` | pods labelled `service=infrahub-task-worker`, port `8000` | infrahub | +| `message-queue` | `-message-queue:15692` | infrahub (RabbitMQ exporter) | +| `database` | `-database:2004` | infrahub (Neo4j metrics) | +| `task-manager-exporter` | `-infrahub-observability-prefect-exporter:8000` | this chart (Prefect exporter) | +| `task-manager` | `-task-manager-server:4200/api/metrics` | infrahub (Prefect server) | +| `logs` | `-loki:3100` and `-alloy:12345` | self | +| `node-exporter` | `-prometheus-node-exporter:9100` | self | + +The only dynamic discovery on the metrics side is for **`infrahub-task-worker`**: +the task-worker has no Service in the infrahub chart, so Alloy uses +`discovery.kubernetes` with `role = "pod"` and a `keep` relabel filtering on +the pod's `service=infrahub-task-worker` label, then rewrites the port to +`8000`. + +#### Annotation-based scrape — not currently consumed + +The Prefect exporter Service the chart creates carries the conventional +`prometheus.io/scrape: "true"`, `prometheus.io/port`, `prometheus.io/path` +annotations. **The shipped Alloy config does not consume those annotations** +— the exporter is scraped via a static target block instead. The annotations +are decorative for now (useful if someone runs their own Prometheus alongside +that does honour them, but Alloy ignores them). If you want to enable +annotation-driven scrape across the whole namespace, you need to override the +Alloy config; see below. + +Toggles: + +| Setting | Effect | +| --- | --- | +| `prefectExporter.enabled` | Toggle the Prefect prometheus exporter Deployment + Service. Disable if you don't run Prefect / task-manager. | +| `prometheus-node-exporter.enabled` | Toggle the host-level metrics DaemonSet. | +| `prometheus.enabled` | Disable the in-cluster Prometheus TSDB entirely. Alloy will then have nowhere to remote-write metrics. | +| `global.infrahubReleaseName` | Used to resolve the `-*` Service names. Set this if your infrahub release isn't named `infrahub`. | + +#### Adding or removing scrape targets + +The Alloy config is shipped as a ConfigMap rendered from +[templates/alloy-config.yaml](../charts/infrahub-observability/templates/alloy-config.yaml). +The scrape list is part of that template — it isn't exposed as a Helm value. +If you need to add custom targets or switch to annotation-based discovery: + +1. **Quick override** — disable the chart's ConfigMap and provide your own: + + ```yaml + alloy: + alloy: + configMap: + create: true # let the Alloy subchart create + own the ConfigMap + content: |- + // ...your custom config.alloy... + ``` + +2. **Fork the chart's ConfigMap** — copy the rendered `obs-alloy` ConfigMap, + add your scrape blocks, and set + `alloy.alloy.configMap.name=` to point Alloy at it instead. + +### Traces — opt-in, no discovery involved + +Traces are not collected via discovery. Workloads have to actively push to +Tempo's OTLP endpoint. For infrahub this is wired via the new +`global.tracing.*` block on the infrahub chart (see Step 4 above). For +custom workloads, point your OTLP client at +`-tempo:4317` (gRPC) or `-tempo:4318` (HTTP). + +Toggles: + +| Setting | Effect | +| --- | --- | +| `tempo.enabled` | Disable Tempo. | +| `global.tracing.enabled` (on the **infrahub** chart) | Inject `INFRAHUB_TRACE_*` env vars on infrahub server + task-worker. Off by default. | +| `global.tracing.endpoint` / `.protocol` / `.insecure` (infrahub chart) | OTLP destination, transport, TLS skip. | + +### Per-component on/off summary + +If you only want a subset of the stack, the top-level subchart toggles let +you trim it down: + +```yaml +alloy: + enabled: false # collector +loki: + enabled: false # log storage +tempo: + enabled: false # trace storage +prometheus: + enabled: false # metric storage +grafana: + enabled: false # UI +prometheus-node-exporter: + enabled: false # host metrics +prefectExporter: + enabled: false # Prefect metrics exporter +``` + +Most users keep all of these on. Disabling Grafana while keeping the rest is +the common pattern when you have an existing org-wide Grafana you want to +point at this stack's Loki/Prometheus/Tempo. + +## Troubleshooting + +**Grafana dashboards or datasources don't appear** + +The sidecar watches all namespaces by default (`sidecar.dashboards.searchNamespace: ALL`). +Confirm the ConfigMaps exist with the right labels: + +```sh +kubectl --namespace infrahub get configmap -l grafana_dashboard=1 +kubectl --namespace infrahub get configmap -l grafana_datasource=1 +``` + +Then check sidecar logs: + +```sh +kubectl --namespace infrahub logs deploy/obs-grafana -c grafana-sc-dashboard +kubectl --namespace infrahub logs deploy/obs-grafana -c grafana-sc-datasources +``` + +**Alloy isn't sending data** + +```sh +kubectl --namespace infrahub get pod -l app.kubernetes.io/name=alloy +kubectl --namespace infrahub logs ds/obs-alloy +``` + +The Alloy config comes from the `obs-alloy` ConfigMap (rendered by +[templates/alloy-config.yaml](../charts/infrahub-observability/templates/alloy-config.yaml)) — +inspect it with `kubectl get configmap obs-alloy -o yaml`. + +**Tempo can't ingest traces** + +Confirm the OTLP gRPC receiver is up: + +```sh +kubectl --namespace infrahub port-forward svc/obs-tempo 4317:4317 +# from another terminal, check the port is reachable: +nc -zv localhost 4317 +``` + +And that infrahub-server has the trace env vars (Step 4 above). + +**Pods are pending due to PVCs** + +Your default StorageClass may not be provisioning. Check: + +```sh +kubectl get pvc --namespace infrahub +kubectl describe pvc --namespace infrahub +``` + +Either install a default StorageClass (kind: `kubectl apply -f https://...local-path-provisioner.yaml`) +or disable persistence in the chart values for a one-off test: + +```sh +helm install obs charts/infrahub-observability \ + --namespace infrahub \ + --set loki.singleBinary.persistence.enabled=false \ + --set tempo.persistence.enabled=false \ + --set prometheus.server.persistentVolume.enabled=false \ + --set grafana.persistence.enabled=false +``` diff --git a/docs/plans/2026-05-12-infrahub-observability-chart.md b/docs/plans/2026-05-12-infrahub-observability-chart.md new file mode 100644 index 0000000..a9812d6 --- /dev/null +++ b/docs/plans/2026-05-12-infrahub-observability-chart.md @@ -0,0 +1,677 @@ +# Infrahub Observability Chart Implementation Plan + +> **For Claude:** REQUIRED SUB-SKILL: Use `superpowers:executing-plans` to implement this plan task-by-task. + +**Goal:** Finish the new `infrahub-observability` Helm chart so it deploys the same observability stack that upstream `opsmill/infrahub` ships for local Docker Compose dev (Alloy + Loki + Prometheus + Tempo + Grafana + Prefect exporter) onto Kubernetes alongside the existing `infrahub` / `infrahub-enterprise` charts. + +**Architecture:** +- Subchart dependencies are already declared in [charts/infrahub-observability/Chart.yaml](../../charts/infrahub-observability/Chart.yaml). +- This chart's own templates supply: an Alloy `config.alloy` ConfigMap (so we can ship our pipelines instead of the subchart's auto-config), Grafana datasource and dashboard ConfigMaps consumed by Grafana's sidecar, and a Deployment+Service for `prefecthq/prometheus-prefect-exporter` (no upstream chart exists). +- URL/name helpers are already defined in [charts/infrahub-observability/templates/_helpers.tpl](../../charts/infrahub-observability/templates/_helpers.tpl) — reuse them (`infrahub-observability.prometheusRemoteWriteUrl`, `lokiPushUrl`, `tempoOtlpGrpcEndpoint`, `alloyConfigMapName`, `prefectExporterFullname`, `prefectApiUrl`). +- Label/annotation pattern to mirror: see [charts/infrahub/templates/infrahub-server.yaml](../../charts/infrahub/templates/infrahub-server.yaml) — top-level `service: ` label plus the chart's `labels` / `selectorLabels` / `annotations` helpers. + +**Upstream source of truth** for Alloy config and Grafana provisioning files: `opsmill/infrahub` repo at ref `infrahub-v1.9.3` (matches [.dashboards-source](../../charts/infrahub-observability/.dashboards-source) and `Chart.yaml`'s `appVersion`). Likely paths: +- `development/alloy/config.alloy` (Alloy pipelines) +- `development/grafana/provisioning/datasources/datasources.yaml` (Grafana datasources) +- `development/grafana/provisioning/dashboards/` (already vendored) + +Verify exact paths with: `gh api repos/opsmill/infrahub/contents/development?ref=infrahub-v1.9.3 --jq '.[].name'` + +**Tech Stack:** Helm 3, Grafana Alloy 1.0.3, Loki 6.16.0, Tempo 1.10.0, Grafana 8.5.0, Prometheus 25.27.0, prometheus-node-exporter 4.36.0. + +**Branch state at plan time:** `feat/infrahub-observability-chart` has no commits yet — everything under [charts/infrahub-observability/](../../charts/infrahub-observability/), [scripts/](../../scripts/), [Makefile](../../Makefile), and `docs/` is untracked. Commit incrementally per task. + +--- + +### Task 1: Add Alloy `config.alloy` ConfigMap + +**Files:** +- Create: `charts/infrahub-observability/templates/alloy-config.yaml` + +**Step 1: Fetch the upstream Alloy config** + +```bash +gh api repos/opsmill/infrahub/contents/development/alloy/config.alloy?ref=infrahub-v1.9.3 \ + --jq '.content' | base64 -d > /tmp/upstream-config.alloy +``` + +If the path differs, list candidates: `gh api repos/opsmill/infrahub/contents/development?ref=infrahub-v1.9.3 --jq '.[].name'`. + +**Step 2: Adapt the config for Kubernetes** + +In the upstream Docker Compose config, endpoints are container names like `loki:3100`. For Kubernetes, rewrite endpoints using the helpers so they resolve to the right Service: +- Prometheus remote_write URL → `{{ include "infrahub-observability.prometheusRemoteWriteUrl" . }}` +- Loki push URL → `{{ include "infrahub-observability.lokiPushUrl" . }}` +- Tempo OTLP gRPC → `{{ include "infrahub-observability.tempoOtlpGrpcEndpoint" . }}` + +Log-discovery and pod-scrape blocks should use `discovery.kubernetes` (component name varies by Alloy version — confirm with the Alloy 1.0.3 reference) instead of the Docker container discovery used upstream. Scope discovery to `{{ include "infrahub-observability.infrahubNamespace" . }}` so we only collect from the sibling infrahub release. + +**Step 3: Write the ConfigMap template** + +```yaml +{{- if .Values.alloy.enabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "infrahub-observability.alloyConfigMapName" . }} + namespace: {{ .Release.Namespace | quote }} + labels: + service: alloy + {{- include "infrahub-observability.labels" . | nindent 4 }} + annotations: + {{- include "infrahub-observability.annotations" . | nindent 4 }} +data: + config.alloy: |- + {{- /* adapted config goes here, indented 4 */ -}} +{{- end }} +``` + +Note: `values.yaml` already sets `alloy.alloy.configMap.create: false` and `alloy.alloy.configMap.key: config.alloy`. We also need to set `alloy.alloy.configMap.name` to the rendered helper value — do this in [charts/infrahub-observability/values.yaml](../../charts/infrahub-observability/values.yaml) by changing the `name: ""` line (currently L42) to a templated reference. The Alloy subchart resolves a `name: ""` by trying to render it as a template, so emit: + +```yaml + name: '{{ printf "%s-alloy" (include "infrahub-observability.fullname" .) }}' +``` + +(Confirm this against the Alloy subchart's `_helpers.tpl` — if it doesn't template the `name` field, document this in `NOTES.txt` and require users to pass `alloy.alloy.configMap.name` explicitly, OR switch to providing the config via `alloy.configMap.content`.) + +**Step 4: Validate** + +```bash +make deps-observability +helm template test charts/infrahub-observability | yq 'select(.kind == "ConfigMap" and (.metadata.name | test("alloy")))' +``` + +Expected: the ConfigMap renders with a non-empty `config.alloy` key and the Alloy DaemonSet mounts it. + +**Step 5: Commit** + +```bash +git add charts/infrahub-observability/templates/alloy-config.yaml charts/infrahub-observability/values.yaml +git commit -m "feat(observability): ship Alloy config.alloy as ConfigMap" +``` + +--- + +### Task 2: Add Grafana datasources ConfigMap + +**Files:** +- Create: `charts/infrahub-observability/templates/grafana-datasources.yaml` + +**Step 1: Fetch the upstream Grafana datasources config** + +```bash +gh api repos/opsmill/infrahub/contents/development/grafana/provisioning/datasources?ref=infrahub-v1.9.3 --jq '.[].name' +gh api repos/opsmill/infrahub/contents/development/grafana/provisioning/datasources/datasources.yaml?ref=infrahub-v1.9.3 \ + --jq '.content' | base64 -d > /tmp/upstream-datasources.yaml +``` + +**Step 2: Write the ConfigMap, rewriting URLs via helpers** + +Replace upstream `http://prometheus:9090` / `http://loki:3100` / `http://tempo:3200` with the helpers. The sidecar (already enabled at [values.yaml:192-196](../../charts/infrahub-observability/values.yaml#L192-L196) with label `grafana_datasource: "1"`) auto-loads any ConfigMap with that label. + +```yaml +{{- if .Values.grafana.enabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "infrahub-observability.fullname" . }}-grafana-datasources + namespace: {{ .Release.Namespace | quote }} + labels: + grafana_datasource: "1" + service: grafana + {{- include "infrahub-observability.labels" . | nindent 4 }} + annotations: + {{- include "infrahub-observability.annotations" . | nindent 4 }} +data: + datasources.yaml: |- + apiVersion: 1 + datasources: + - name: Prometheus + type: prometheus + access: proxy + url: {{ include "infrahub-observability.prometheusUrl" . }} + isDefault: true + jsonData: + httpMethod: POST + timeInterval: 30s + - name: Loki + type: loki + access: proxy + url: {{ include "infrahub-observability.lokiUrl" . }} + jsonData: + maxLines: 1000 + - name: Tempo + type: tempo + access: proxy + url: {{ include "infrahub-observability.tempoUrl" . }} + jsonData: + tracesToLogsV2: + datasourceUid: loki +{{- end }} +``` + +Cross-check the `jsonData` blocks against the upstream `datasources.yaml` and bring over anything we're missing (UID-based linking between Tempo and Loki, exemplar config, etc.). + +**Step 3: Validate** + +```bash +helm template test charts/infrahub-observability | yq 'select(.kind == "ConfigMap" and (.metadata.labels.grafana_datasource? == "1"))' +``` + +Expected: one ConfigMap with three datasources. + +**Step 4: Commit** + +```bash +git add charts/infrahub-observability/templates/grafana-datasources.yaml +git commit -m "feat(observability): provision Grafana datasources via sidecar ConfigMap" +``` + +--- + +### Task 3: Add Grafana dashboard ConfigMaps (one per dashboard) + +**Files:** +- Create: `charts/infrahub-observability/templates/grafana-dashboards.yaml` + +**Why one ConfigMap per dashboard:** etcd's hard limit on a single ConfigMap is 1 MiB. The vendored JSONs total ~917 KB and [dashboards/loki_monitoring.json](../../charts/infrahub-observability/dashboards/loki_monitoring.json) alone is 389 KB — bundling all seven into a single ConfigMap would risk hitting the limit and breaking installs. Iterating with `Files.Glob` produces one ConfigMap per file. + +**Step 1: Write the template** + +```yaml +{{- if .Values.grafana.enabled }} +{{- $root := . }} +{{- range $path, $_ := .Files.Glob "dashboards/*.json" }} +{{- $name := base $path | trimSuffix ".json" }} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "infrahub-observability.fullname" $root }}-dashboard-{{ $name | replace "_" "-" }} + namespace: {{ $root.Release.Namespace | quote }} + labels: + grafana_dashboard: "1" + service: grafana + {{- include "infrahub-observability.labels" $root | nindent 4 }} + annotations: + {{- include "infrahub-observability.annotations" $root | nindent 4 }} +data: + {{ base $path }}: |- + {{- $root.Files.Get $path | nindent 4 }} +{{- end }} +{{- end }} +``` + +**Step 2: Validate** + +```bash +helm template test charts/infrahub-observability | yq 'select(.kind == "ConfigMap" and (.metadata.labels.grafana_dashboard? == "1")) | .metadata.name' +``` + +Expected: 7 ConfigMap names (one per file in [dashboards/](../../charts/infrahub-observability/dashboards/)). Also confirm each rendered ConfigMap stays under 1 MiB: + +```bash +helm template test charts/infrahub-observability \ + | yq 'select(.kind == "ConfigMap" and (.metadata.labels.grafana_dashboard? == "1")) | [.metadata.name, (.data | to_entries | .[0].value | length)] | @tsv' +``` + +**Step 3: Commit** + +```bash +git add charts/infrahub-observability/templates/grafana-dashboards.yaml +git commit -m "feat(observability): provision vendored Grafana dashboards via sidecar ConfigMaps" +``` + +--- + +### Task 4: Add Prefect exporter Deployment + +**Files:** +- Create: `charts/infrahub-observability/templates/prefect-exporter-deployment.yaml` + +**Step 1: Write the template** + +Mirror the pattern from [charts/infrahub/templates/infrahub-server.yaml](../../charts/infrahub/templates/infrahub-server.yaml) (top-level `service:` label + chart helpers). Image/log-level/replicas/probes come from `values.yaml`. The exporter listens on `:8000` by default and exposes `/metrics`. + +```yaml +{{- if .Values.prefectExporter.enabled }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "infrahub-observability.prefectExporterFullname" . }} + namespace: {{ .Release.Namespace | quote }} + labels: + service: prefect-exporter + {{- include "infrahub-observability.labels" . | nindent 4 }} + annotations: + {{- include "infrahub-observability.annotations" . | nindent 4 }} +spec: + replicas: {{ .Values.prefectExporter.replicas }} + selector: + matchLabels: + service: prefect-exporter + {{- include "infrahub-observability.selectorLabels" . | nindent 6 }} + template: + metadata: + labels: + service: prefect-exporter + {{- include "infrahub-observability.selectorLabels" . | nindent 8 }} + {{- with .Values.prefectExporter.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + spec: + {{- with .Values.global.imagePullSecrets }} + imagePullSecrets: {{- toYaml . | nindent 8 }} + {{- end }} + securityContext: + {{- toYaml .Values.prefectExporter.securityContext | nindent 8 }} + containers: + - name: prefect-exporter + image: "{{ .Values.prefectExporter.image.repository }}:{{ .Values.prefectExporter.image.tag }}" + imagePullPolicy: {{ default .Values.global.imagePullPolicy .Values.prefectExporter.image.pullPolicy }} + env: + - name: PREFECT_API_URL + value: {{ include "infrahub-observability.prefectApiUrl" . | quote }} + - name: LOG_LEVEL + value: {{ .Values.prefectExporter.logLevel | quote }} + ports: + - name: metrics + containerPort: 8000 + protocol: TCP + readinessProbe: + httpGet: + path: /metrics + port: metrics + initialDelaySeconds: 10 + periodSeconds: 30 + resources: + {{- toYaml .Values.prefectExporter.resources | nindent 12 }} + {{- with .Values.prefectExporter.nodeSelector }} + nodeSelector: {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.prefectExporter.tolerations }} + tolerations: {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.prefectExporter.affinity }} + affinity: {{- toYaml . | nindent 8 }} + {{- end }} +{{- end }} +``` + +Verify the actual env-var names against the `prefecthq/prometheus-prefect-exporter` image docs at `docker.io/prefecthq/prometheus-prefect-exporter` (README on Docker Hub) before committing — `PREFECT_API_URL` and `LOG_LEVEL` are the documented names as of v3.x but confirm. + +**Step 2: Validate** + +```bash +helm template test charts/infrahub-observability | yq 'select(.kind == "Deployment" and (.metadata.name | test("prefect-exporter")))' +``` + +Expected: Deployment with PREFECT_API_URL pointing at `http://infrahub-task-manager-server:4200/api`. + +**Step 3: Commit** + +```bash +git add charts/infrahub-observability/templates/prefect-exporter-deployment.yaml +git commit -m "feat(observability): add Prefect prometheus exporter Deployment" +``` + +--- + +### Task 5: Add Prefect exporter Service + +**Files:** +- Create: `charts/infrahub-observability/templates/prefect-exporter-service.yaml` + +**Step 1: Write the template** + +Annotate with `prometheus.io/scrape: "true"` so Alloy's annotation-based service discovery picks it up automatically. + +```yaml +{{- if .Values.prefectExporter.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "infrahub-observability.prefectExporterFullname" . }} + namespace: {{ .Release.Namespace | quote }} + labels: + service: prefect-exporter + {{- include "infrahub-observability.labels" . | nindent 4 }} + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: {{ .Values.prefectExporter.service.port | quote }} + prometheus.io/path: "/metrics" + {{- include "infrahub-observability.annotations" . | nindent 4 }} +spec: + type: {{ .Values.prefectExporter.service.type }} + ports: + - name: metrics + port: {{ .Values.prefectExporter.service.port }} + targetPort: metrics + protocol: TCP + selector: + service: prefect-exporter + {{- include "infrahub-observability.selectorLabels" . | nindent 4 }} +{{- end }} +``` + +**Step 2: Validate** + +```bash +helm template test charts/infrahub-observability | yq 'select(.kind == "Service" and (.metadata.name | test("prefect-exporter")))' +``` + +Expected: Service on port 8000, selector matches the Deployment from Task 4. + +**Step 3: Commit** + +```bash +git add charts/infrahub-observability/templates/prefect-exporter-service.yaml +git commit -m "feat(observability): expose Prefect exporter via Service with scrape annotations" +``` + +--- + +### Task 6: Add `NOTES.txt` + +**Files:** +- Create: `charts/infrahub-observability/templates/NOTES.txt` + +**Step 1: Write install notes** + +Cover the top three questions a user will have after `helm install`: + +1. How to port-forward Grafana (`kubectl port-forward svc/{{ .Release.Name }}-grafana 3000:80`) and the default admin password (look up via `kubectl get secret`). +2. How to point the sibling infrahub release at this Tempo (the `global.tracing.endpoint` snippet from [README.md.gotmpl:32-41](../../charts/infrahub-observability/README.md.gotmpl#L32-L41)). +3. The OTLP gRPC endpoint, Prometheus remote-write URL, and Loki push URL — render via the helpers so the user gets the exact in-cluster DNS name for their release. + +Keep it under ~40 lines. Use `printf` / helper expressions for any release-name-dependent strings. + +**Step 2: Validate** + +```bash +helm install --dry-run test charts/infrahub-observability | sed -n '/^NOTES:/,$p' +``` + +Expected: notes render with the actual release name substituted. + +**Step 3: Commit** + +```bash +git add charts/infrahub-observability/templates/NOTES.txt +git commit -m "feat(observability): add post-install NOTES with port-forward and wiring tips" +``` + +--- + +### Task 7: Add `global.tracing` to the infrahub chart (cross-chart) + +**Why this lives in the infrahub chart, not the observability chart:** the env vars need to land on the infrahub server and task-worker pods. The observability chart only provides the collector — it can't reach across releases to inject env on someone else's Deployment. So we add a single `global.tracing` block to the infrahub chart that emits the OTEL/INFRAHUB_TRACE env vars to both workloads. Defaults to `enabled: false` so existing users without an OTLP collector aren't affected; observability users flip one flag. + +**Files:** +- Modify: [charts/infrahub/values.yaml](../../charts/infrahub/values.yaml) +- Modify: [charts/infrahub/templates/_env.tpl](../../charts/infrahub/templates/_env.tpl) +- Modify: [charts/infrahub/templates/infrahub-server.yaml](../../charts/infrahub/templates/infrahub-server.yaml) +- Modify: [charts/infrahub/templates/infrahub-task-worker.yaml](../../charts/infrahub/templates/infrahub-task-worker.yaml) +- Possibly modify: [charts/infrahub-enterprise/values.yaml](../../charts/infrahub-enterprise/values.yaml) and any preset values files (only if they shadow `global:`) +- Modify: [charts/infrahub-observability/README.md.gotmpl](../../charts/infrahub-observability/README.md.gotmpl) so the wiring snippet matches reality + +**Step 1: Verify the exact env-var names against upstream** + +The README.md.gotmpl currently uses placeholder names. Look up what `opsmill/infrahub@infrahub-v1.9.3` actually reads: + +```bash +# Search the upstream Python codebase for the env vars it consumes +gh search code --repo opsmill/infrahub --filename "*.py" "INFRAHUB_TRACE" +gh search code --repo opsmill/infrahub --filename "*.py" "OTEL_EXPORTER_OTLP_ENDPOINT" +# Also check the Docker Compose dev setup for the names it sets: +gh api repos/opsmill/infrahub/contents/development?ref=infrahub-v1.9.3 --jq '.[] | select(.name | test("compose|env")) | .name' +``` + +Capture the *exact* names — likely `INFRAHUB_TRACE_ENABLED`, `INFRAHUB_TRACE_EXPORTER_PROTOCOL`, `INFRAHUB_TRACE_EXPORTER_ENDPOINT`, `INFRAHUB_TRACE_INSECURE`, but **do not assume** — use whatever the upstream code actually reads. If both `INFRAHUB_TRACE_*` and `OTEL_*` are needed, emit both. + +**Step 2: Add `global.tracing` to `charts/infrahub/values.yaml`** + +Add to the `global:` block at [charts/infrahub/values.yaml:2-16](../../charts/infrahub/values.yaml#L2-L16): + +```yaml + # -- Send traces to an OTLP collector. When enabled, OTEL/Infrahub trace env + # vars are injected into the server and task-worker Deployments. Pair with + # the infrahub-observability chart (Tempo endpoint: -tempo:4317) + # or any other OTLP-compatible collector. + tracing: + # -- Enable tracing instrumentation on server and task-worker pods. + enabled: false + # -- OTLP endpoint. For grpc protocol, use host:port (no scheme). + # For http/protobuf, use a full URL. Example: "obs-tempo:4317". + endpoint: "" + # -- OTLP protocol. One of: grpc, http/protobuf. + protocol: grpc + # -- Skip TLS verification when talking to the collector. + insecure: true +``` + +Keep `enabled: false`. Leave the block uncommented so it shows up in helm-docs and is one `--set` away. + +**Step 3: Add a `tracingEnv` helper to `_env.tpl`** + +In [charts/infrahub/templates/_env.tpl](../../charts/infrahub/templates/_env.tpl), add a reusable define near the bottom: + +```gotmpl +{{/* +Tracing env vars emitted onto server and task-worker pods when +.Values.global.tracing.enabled is true. Use the exact env-var names +confirmed in Step 1. +*/}} +{{- define "infrahub-helm.tracingEnv" -}} +{{- if .Values.global.tracing.enabled }} +- name: INFRAHUB_TRACE_ENABLED + value: "true" +- name: INFRAHUB_TRACE_EXPORTER_PROTOCOL + value: {{ .Values.global.tracing.protocol | quote }} +- name: INFRAHUB_TRACE_EXPORTER_ENDPOINT + value: {{ .Values.global.tracing.endpoint | quote }} +- name: INFRAHUB_TRACE_INSECURE + value: {{ .Values.global.tracing.insecure | quote }} +{{- end }} +{{- end }} +``` + +Adjust names to whatever Step 1 turned up. If upstream also reads stock OTEL SDK vars, emit those alongside (e.g., `OTEL_EXPORTER_OTLP_ENDPOINT`, `OTEL_EXPORTER_OTLP_PROTOCOL`, `OTEL_SERVICE_NAME` set to the workload name). + +**Step 4: Include the helper in server + task-worker templates** + +Find the `env:` block in [infrahub-server.yaml](../../charts/infrahub/templates/infrahub-server.yaml) (around L120) and append the helper output after the existing default env: + +```gotmpl + env: + {{- include "infrahub-helm.infrahubServer.defaultEnv" . | nindent 12 }} + {{- include "infrahub-helm.tracingEnv" . | nindent 12 }} + {{- range $key, $value := .Values.infrahubServer.infrahubServer.env }} + - name: {{ $key }} + value: {{ $value | quote }} + {{- end }} +``` + +Do the same in [infrahub-task-worker.yaml](../../charts/infrahub/templates/infrahub-task-worker.yaml). The existing user-supplied `env:` map should still take precedence — keep it last so a user can override a tracing var explicitly if needed. + +**Step 5: Update the observability chart README to point at the real keys** + +Replace the snippet at [charts/infrahub-observability/README.md.gotmpl:30-41](../../charts/infrahub-observability/README.md.gotmpl#L30-L41) so it reflects the actual key shape and uses `host:port` for the grpc endpoint: + +```yaml +# infrahub values +global: + tracing: + enabled: true + endpoint: "obs-tempo:4317" # -tempo:4317 + protocol: grpc + insecure: true +``` + +**Step 6: Add the one-liner to the observability NOTES.txt (from Task 6)** + +When Task 6 is being executed, include a section like: + +``` +2. Send traces from infrahub to this stack: + + helm upgrade {{ .Values.global.infrahubReleaseName }} ./charts/infrahub \ + --reuse-values \ + --set global.tracing.enabled=true \ + --set global.tracing.endpoint={{ include "infrahub-observability.tempoOtlpGrpcEndpoint" . }} +``` + +This is the entire "wiring" step from the user's perspective. If Task 6 has already been committed, treat this as an amendment to NOTES.txt in this task. + +**Step 7: Validate** + +```bash +# Render with tracing off — no INFRAHUB_TRACE_* vars on server or task-worker +helm template test charts/infrahub | yq 'select(.kind == "Deployment") | .spec.template.spec.containers[0].env' | grep -i trace || echo "OK: no tracing env" + +# Render with tracing on — vars appear on both deployments +helm template test charts/infrahub \ + --set global.tracing.enabled=true \ + --set global.tracing.endpoint=obs-tempo:4317 \ + | yq 'select(.kind == "Deployment") | {(.metadata.name): [.spec.template.spec.containers[0].env[] | select(.name | test("TRACE|OTEL"))]}' + +# Re-lint both base + enterprise (enterprise inherits the global block) +helm lint charts/infrahub +helm lint charts/infrahub-enterprise +``` + +Expected: both `infrahub-server` and `infrahub-task-worker` Deployments emit the trace env vars when enabled; both lints clean. + +**Step 8: Commit** + +```bash +git add charts/infrahub/values.yaml \ + charts/infrahub/templates/_env.tpl \ + charts/infrahub/templates/infrahub-server.yaml \ + charts/infrahub/templates/infrahub-task-worker.yaml \ + charts/infrahub-observability/README.md.gotmpl \ + charts/infrahub-observability/templates/NOTES.txt +git commit -m "feat(infrahub): add global.tracing for OTLP collector wiring + +Adds an opt-in global.tracing block on the infrahub chart that emits +INFRAHUB_TRACE_* (and OTEL_*) env vars onto the server and task-worker +Deployments. Defaults to disabled so existing users are unaffected; +users of the new infrahub-observability chart can flip it on with one +flag and point at the chart's Tempo endpoint." +``` + +--- + +### Task 8: Wire the chart into CI + +**Files:** +- Modify: [.github/workflows/ci.yml](../../.github/workflows/ci.yml) + +**Step 1: Add lint step** + +After the existing `Linting: helm lint infrahub enterprise` step at [.github/workflows/ci.yml:31-32](../../.github/workflows/ci.yml#L31-L32), append: + +```yaml + - name: "Updating dependencies: infrahub-observability" + run: "helm dependency update charts/infrahub-observability" + - name: "Linting: helm lint infrahub-observability" + run: "helm lint charts/infrahub-observability" +``` + +**Step 2: Validate** + +```bash +yamllint .github/workflows/ci.yml +make lint +``` + +Expected: yamllint passes, `make lint` succeeds for all three charts. + +**Step 3: Commit** + +```bash +git add .github/workflows/ci.yml +git commit -m "ci: lint infrahub-observability chart in CI" +``` + +--- + +### Task 9: Render `README.md` from gotmpl + final end-to-end verification + +**Files:** +- Create: `charts/infrahub-observability/README.md` (generated) + +**Step 1: Render README via helm-docs** + +```bash +# https://github.com/norwoodj/helm-docs +helm-docs --chart-search-root=charts/infrahub-observability +``` + +If `helm-docs` isn't installed, document the command in `Makefile` and skip the actual render until CI handles it. Check whether the other charts have a committed `README.md` — if so, follow that convention; if not, leave it gotmpl-only. + +**Step 2: Final end-to-end checks** + +```bash +make deps-observability +make lint-observability +make template-observability > /tmp/rendered.yaml +# Spot-check: every Kubernetes object renders, no template errors +yq '.kind' /tmp/rendered.yaml | sort | uniq -c +# Spot-check: no dashboard ConfigMap exceeds 1 MiB +yq 'select(.kind == "ConfigMap" and (.metadata.labels.grafana_dashboard? == "1")) | [.metadata.name, (.data | to_entries | .[0].value | length)] | @tsv' /tmp/rendered.yaml +# Optional: dry-run install against a kind cluster if available +helm install --dry-run --debug test charts/infrahub-observability > /dev/null +``` + +Expected: zero `helm lint` errors, all helpers resolve, dashboard ConfigMap sizes well under 1048576. + +**Step 3: Commit** + +```bash +git add charts/infrahub-observability/README.md # if generated +git commit -m "docs(observability): render README from gotmpl" +``` + +--- + +### Task 10: Open the PR + +**Step 1: Push and open PR** + +```bash +git push -u origin feat/infrahub-observability-chart +gh pr create --base stable --title "feat: add infrahub-observability chart" --body "$(cat <<'EOF' +## Summary +- New `infrahub-observability` Helm chart bundling Alloy + Loki + Tempo + Prometheus + Grafana + Prefect exporter for Kubernetes installs. +- Vendors seven Grafana dashboards from `opsmill/infrahub@infrahub-v1.9.3` via [scripts/sync-dashboards.sh](../../scripts/sync-dashboards.sh). +- Provisions Grafana datasources and dashboards via sidecar ConfigMaps; ships an in-chart Deployment+Service for the Prefect prometheus exporter (no upstream chart exists). +- Adds an opt-in `global.tracing` block to the infrahub chart so users can wire OTLP/INFRAHUB_TRACE env vars onto server + task-worker with a single flag. +- CI now lints all three charts. + +## Test plan +- [ ] `make lint` passes locally +- [ ] `helm template test charts/infrahub-observability` renders without errors +- [ ] All dashboard ConfigMaps stay under 1 MiB +- [ ] Manual install into a kind cluster shows Grafana with all three datasources reachable and all dashboards visible +- [ ] Wire infrahub `global.tracing.endpoint` at the chart's Tempo and confirm traces appear +EOF +)" +``` + +**Step 2: Verify** + +```bash +gh pr view --web +``` + +--- + +## Known unknowns to investigate during execution + +- **Alloy subchart `configMap.name` templating:** confirm whether the Alloy 1.0.3 subchart renders `alloy.alloy.configMap.name` as a template (Task 1 Step 3). If not, switch strategy. +- **Upstream `development/` paths:** Task 1 and Task 2 both assume specific paths under `opsmill/infrahub@infrahub-v1.9.3` — verify with `gh api` before fetching. +- **Prefect exporter env-var names:** Task 4 assumes `PREFECT_API_URL` and `LOG_LEVEL` — confirm against the image docs. +- **Infrahub tracing env-var names:** Task 7 assumes `INFRAHUB_TRACE_*` — confirm the exact names (and whether stock `OTEL_*` SDK vars are also needed) by searching the upstream infrahub Python source at ref `infrahub-v1.9.3` before writing the helper. +- **helm-docs availability:** Task 9 assumes `helm-docs` is installable; if not, leave README rendering for a follow-up. diff --git a/docs/plans/2026-05-14-dashboard-k8s-adaptation-design.md b/docs/plans/2026-05-14-dashboard-k8s-adaptation-design.md new file mode 100644 index 0000000..5d333bc --- /dev/null +++ b/docs/plans/2026-05-14-dashboard-k8s-adaptation-design.md @@ -0,0 +1,160 @@ +# Dashboard Kubernetes adaptation — design + +**Status:** Approved, ready for implementation +**Date:** 2026-05-14 +**Branch:** `feat/infrahub-observability-chart` + +## Problem + +`infrahub-observability` ships seven Grafana dashboards vendored from +`opsmill/infrahub` (a docker-compose dev stack). Two of them — +`container_resources` and `neo4j_monitoring` — have panels that show no data +in Kubernetes because: + +1. **No `container_*` metrics are collected.** Upstream runs a standalone + cAdvisor container; our chart's Alloy config doesn't scrape the K8s + equivalent (kubelet `/metrics/cadvisor`). Prometheus has zero + `container_*` series. +2. **The dashboards filter by Docker-only labels.** Queries use + `container_label_com_docker_compose_service` and + `container_label_com_docker_compose_project`, which only exist when + cAdvisor scrapes a Docker daemon. Even if we collected the metrics, the + filters would never match. + +Both issues need fixing, and the fix must survive future upstream syncs — +the dashboards are re-pulled by `scripts/sync-dashboards.sh` whenever +upstream cuts a new infrahub release. + +## Goals + +- `container_*` metrics flow into Prometheus. +- Container Resources and Neo4j Monitoring dashboards render real data. +- The fix survives `make sync-dashboards REF=…`. +- CI catches regressions when upstream or our chart drifts. + +## Non-goals + +- Replacing the upstream dashboards with a different design. +- Wholesale switch to kube-prometheus-stack. +- Per-cluster customisation of the dashboards beyond what upstream allows. + +## Design + +### 1. Collection: scrape kubelet cAdvisor + +Add a `prometheus.scrape "cadvisor"` block to +`charts/infrahub-observability/templates/alloy-config.yaml`. The scrape +target is the API server's node-proxy endpoint +(`/api/v1/nodes//proxy/metrics/cadvisor`), one per node, discovered +via `discovery.kubernetes` with `role = "node"`. TLS uses the in-cluster +CA; auth uses the Alloy ServiceAccount bearer token. + +A new template `templates/alloy-cadvisor-rbac.yaml` provisions a +ClusterRole granting `get` on `nodes/proxy` and a ClusterRoleBinding to +the Alloy SA created by the subchart. The block is gated by a new value +`alloy.cadvisor.enabled` (default `true`) so users without the RBAC +appetite can disable it. + +### 2. Adaptation: post-sync transform script + +`scripts/transform_dashboard.py` rewrites the dashboards from raw upstream +form to K8s-adapted form. Rules: + +```python +REPLACEMENTS = [ + ("container_label_com_docker_compose_service", "container"), + ("container_label_com_docker_compose_project", "namespace"), + ('id!=""', 'container!="", image!=""'), +] +``` + +Dashboard template-variable queries that key off Docker compose are +rewritten to use K8s labels (`namespace`, `container`). + +`scripts/sync-dashboards.sh` is extended to call the transform on every +fetched JSON before writing it to +`charts/infrahub-observability/dashboards/`. Upstream is the only source +of truth; we never edit the vendored JSONs by hand. The transform is +idempotent — re-running on already-transformed JSON is a no-op. + +### 3. Validation: static query allowlist + +`scripts/validate_dashboards.py` parses every dashboard JSON, extracts all +`expr` fields, and verifies: + +- Every metric name appears in `scripts/known-metrics.yaml` (curated by + source: infrahub_app, cadvisor, node_exporter, prometheus_internal, + loki). +- Every label name in a selector appears in `scripts/known-labels.yaml` + (either metric-native or relabeled onto the series by Alloy). + +Exit non-zero on any violation. Runs in CI right after `helm lint`. +Forces an explicit acknowledgement (allowlist edit) when collection +changes. + +Trade-offs accepted: +- Doesn't verify query *semantics* (right grouping, right rate window). +- Allowlist must be kept in sync with the Alloy scrape config — but this + is the point: a missed metric in the allowlist is a missing scrape. + +### 4. Wiring + +``` +charts/infrahub-observability/templates/ + alloy-config.yaml MODIFY cadvisor scrape + node discovery + alloy-cadvisor-rbac.yaml NEW ClusterRole/Binding for Alloy SA + +scripts/ + sync-dashboards.sh MODIFY post-fetch transform step + transform_dashboard.py NEW k8s adaptation + validate_dashboards.py NEW static query validation + known-metrics.yaml NEW allowlist by source + known-labels.yaml NEW allowlist + +charts/infrahub-observability/dashboards/ + container_resources.json REGEN re-run sync to produce k8s form + neo4j_monitoring.json REGEN same + +.github/workflows/ci.yml MODIFY add validate_dashboards.py step +docs/local-testing-observability.md MODIFY note container_* present, mention toggle +``` + +## Commit sequence + +1. `feat(observability): scrape kubelet cAdvisor for container metrics` + — alloy-config + RBAC. Metrics start flowing; dashboards still + filter-broken. +2. `feat(observability): add dashboard transform pipeline for k8s + label adaptation` — transform script + sync integration. No JSON + regen yet; just the tooling. +3. `chore(observability): re-sync dashboards through k8s transform` — + regenerates the two affected dashboards. This is where the UI + becomes correct. +4. `ci: static-validate dashboard queries against known-metrics + allowlist` — validator + CI step. Allowlist files are committed + last so they reflect the final state. + +Each step independently passes `helm lint`, so bisecting works. + +## Risks + +- **RBAC expansion.** `nodes/proxy` is cluster-scope `get`. Standard + monitoring permission (kube-prometheus-stack et al. request the same) + but worth flagging in the chart's `Service discovery and toggles` + section. +- **Transform regex fragility.** If upstream switches phrasing + (`container_label_com_docker_compose_service!=""` → + `container_label_com_docker_compose_service=~".+"`), the rewrite may + miss. The validator catches the residual unknown-label in CI rather + than silently shipping broken dashboards. +- **Allowlist drift.** Adding a new metric source (e.g., kube-state- + metrics later) requires updating `known-metrics.yaml`. This is + intentional friction. + +## Out-of-scope follow-ups + +- kube-state-metrics integration (pod/replicaset metadata). +- Live-cluster integration test in CI (deferred; static validation + catches the immediate concern). +- Custom dashboards for the chart's own components beyond what upstream + ships. diff --git a/scripts/known-metrics.yaml b/scripts/known-metrics.yaml new file mode 100644 index 0000000..5875a80 --- /dev/null +++ b/scripts/known-metrics.yaml @@ -0,0 +1,49 @@ +# Metric inventory for scripts/validate_dashboards.py +# +# The validator uses prefix-matching to verify that every metric referenced +# in a dashboard plausibly comes from one of our collected sources. This +# isn't strict membership testing — adding a new dashboard that references +# `loki_some_new_metric_total` doesn't require updating this file because +# the `loki_*` prefix is already accepted. +# +# Update this file when: +# 1. The chart starts (or stops) scraping a new metrics source. +# 2. A new third-party subchart appears with its own metric prefix. + +# Metric-name prefixes accepted as "we collect this". +# Each entry is a glob ("*" wildcard) matched against the full metric name. +collected_prefixes: + # === Our own scrapes (Alloy → Prometheus remote_write) === + - infrahub_* # infrahub-server :8000/metrics + - prefect_* # Prefect exporter (this chart) + Prefect server :4200/api/metrics + - container_* # kubelet /metrics/cadvisor (Section 1) + - machine_* # kubelet /metrics/cadvisor (machine-info) + - node_* # prometheus-node-exporter + - rabbitmq_* # rabbit prom exporter on :9419 + - erlang_* # rabbit (Erlang VM stats) + # === Subchart internals (collected via Alloy scraping each component's metrics endpoint) === + - loki_* # Loki itself + - tempo_* # Tempo itself + - logql_* # Loki query engine + - cortex_* # Loki/Tempo bits inherited from Cortex + - alloy_* # Alloy self-monitoring + - prometheus_* # Prometheus itself + - querier_* # Loki querier internals (unprefixed) + - ring_* # Loki ring internals (unprefixed) + - deprecated_flags_* # Loki deprecation counters + # === Standard process/runtime metrics emitted by most Go/Python services === + - go_* + - process_* + - python_* + # === Prometheus per-scrape internals === + - up + - scrape_* + +# Forbidden tokens. Any occurrence in a dashboard `expr` (after the transform +# pipeline runs) is a hard CI failure — they signal the transform script +# missed something or upstream introduced a new pattern. +denied_tokens: + - container_label_com_docker_compose_service + - container_label_com_docker_compose_project + - container_label_com_docker_compose_oneoff + - container_label_com_docker_compose_container_number diff --git a/scripts/sync-dashboards.sh b/scripts/sync-dashboards.sh new file mode 100755 index 0000000..a3a9e48 --- /dev/null +++ b/scripts/sync-dashboards.sh @@ -0,0 +1,87 @@ +#!/usr/bin/env bash +# Sync vendored Grafana dashboards from the upstream infrahub repository. +# +# Usage: +# scripts/sync-dashboards.sh # use ref recorded in .dashboards-source +# scripts/sync-dashboards.sh # override ref (git tag/branch/SHA), updates .dashboards-source on success +# +# Reads the chart's .dashboards-source for repo/path/files, fetches each file +# from raw.githubusercontent.com, validates JSON, writes into +# charts/infrahub-observability/dashboards/, and (if a REF was passed) updates +# the ref field in .dashboards-source. +# +# No clone, no submodule — just curl + jq. Designed to run in CI without setup. + +set -euo pipefail + +CHART_DIR="charts/infrahub-observability" +SOURCE_FILE="${CHART_DIR}/.dashboards-source" +DASHBOARDS_DIR="${CHART_DIR}/dashboards" + +if [[ ! -f "$SOURCE_FILE" ]]; then + echo "error: ${SOURCE_FILE} not found. Run from the repo root." >&2 + exit 1 +fi + +require() { + if ! command -v "$1" >/dev/null 2>&1; then + echo "error: '$1' is required but not installed" >&2 + exit 1 + fi +} +require curl +require yq +require jq +require python3 + +REPO=$(yq -r '.repo' "$SOURCE_FILE") +PATH_IN_REPO=$(yq -r '.path' "$SOURCE_FILE") +CURRENT_REF=$(yq -r '.ref' "$SOURCE_FILE") +REF="${1:-$CURRENT_REF}" + +if [[ -z "$REPO" || -z "$PATH_IN_REPO" || -z "$REF" ]]; then + echo "error: .dashboards-source missing repo/path/ref" >&2 + exit 1 +fi + +readarray -t FILES < <(yq -r '.files[]' "$SOURCE_FILE") +if [[ ${#FILES[@]} -eq 0 ]]; then + echo "error: .dashboards-source has no files listed" >&2 + exit 1 +fi + +mkdir -p "$DASHBOARDS_DIR" + +echo "Syncing ${#FILES[@]} dashboard(s) from ${REPO}@${REF}:${PATH_IN_REPO}" +TMPDIR=$(mktemp -d) +trap 'rm -rf "$TMPDIR"' EXIT + +for f in "${FILES[@]}"; do + URL="https://raw.githubusercontent.com/${REPO}/${REF}/${PATH_IN_REPO}/${f}" + TMP_FILE="${TMPDIR}/${f}" + echo " fetching ${f}" + if ! curl --fail --silent --show-error --location --output "$TMP_FILE" "$URL"; then + echo "error: failed to download ${URL}" >&2 + exit 1 + fi + if ! jq empty "$TMP_FILE" >/dev/null 2>&1; then + echo "error: ${f} is not valid JSON" >&2 + exit 1 + fi + # Apply K8s adaptations (rewrite docker-compose labels to K8s + # equivalents). The transform is idempotent — running it on an + # already-transformed file is a no-op. + python3 "$(dirname "$0")/transform_dashboard.py" --in-place "$TMP_FILE" + if ! jq empty "$TMP_FILE" >/dev/null 2>&1; then + echo "error: ${f} became invalid JSON after transform" >&2 + exit 1 + fi + mv "$TMP_FILE" "${DASHBOARDS_DIR}/${f}" +done + +if [[ "$REF" != "$CURRENT_REF" ]]; then + yq -i ".ref = \"${REF}\"" "$SOURCE_FILE" + echo "Updated .dashboards-source ref: ${CURRENT_REF} -> ${REF}" +fi + +echo "Done." diff --git a/scripts/transform_dashboard.py b/scripts/transform_dashboard.py new file mode 100755 index 0000000..d3e945e --- /dev/null +++ b/scripts/transform_dashboard.py @@ -0,0 +1,236 @@ +#!/usr/bin/env python3 +"""Rewrite a vendored Grafana dashboard JSON from docker-compose form to K8s form. + +The upstream `opsmill/infrahub` repository ships dashboards designed for its +docker-compose dev stack. Two patterns don't carry over to Kubernetes: + +1. Container-resource panels filter on `container_label_com_docker_compose_*` + labels, which only exist when cAdvisor scrapes Docker. In Kubernetes, + cAdvisor (via kubelet) emits the same `container_*` metrics but labels + them with K8s-native labels (`container`, `namespace`, `pod`, `node`). + +2. The label *values* differ because Bitnami / community subcharts pick + container names independent of the docker-compose service names. The + Neo4j chart's container is called `neo4j`, not `database`; the redis + chart's container is `redis`, not `cache`; and so on. + +This script rewrites both the label names and the docker-compose-specific +filter values to their K8s equivalents. It runs on stdin → stdout (or in +place with --in-place) and is idempotent: re-running on an already +transformed file is a no-op because the new patterns no longer match the +old ones. + +The transform is intentionally conservative — it only touches PromQL +`expr` fields and template-variable queries. Panel titles, descriptions, +and unrelated text are left alone. + +Usage: + python3 scripts/transform_dashboard.py < raw.json > k8s.json + python3 scripts/transform_dashboard.py --in-place path/to/dashboard.json +""" + +from __future__ import annotations + +import argparse +import json +import re +import sys +from pathlib import Path +from typing import Any + +# Label-name rewrites: tokens that appear bare in PromQL. +LABEL_REWRITES: list[tuple[str, str]] = [ + ("container_label_com_docker_compose_service", "container"), + ("container_label_com_docker_compose_project", "namespace"), +] + +# Filter-value rewrites: values that may appear inside a `container=~"..."` +# selector (or as exact-match equality). Docker-compose service name on the +# left, Kubernetes container name on the right. +# +# These are applied only AFTER label-name rewrites, and only inside string +# tokens that look like label-value selectors, so we don't accidentally +# rewrite panel titles or unrelated text. +VALUE_REWRITES: dict[str, str] = { + "database": "neo4j", + "cache": "redis", + "message-queue": "rabbitmq", + "task-manager-db": "postgresql", + # infrahub-server / infrahub-task-worker / prefect-server are identical + # in Docker and K8s, so they don't need an entry. +} + +# The cAdvisor `id` label was used in docker-compose dashboards to filter +# out the cgroup root. In K8s cAdvisor, the equivalent guard is to require +# both `container` and `image` labels (which excludes pod-level and pause +# containers). +ID_FILTER_REWRITES: list[tuple[str, str]] = [ + ('id!=""', 'container!="", image!=""'), + # Same pattern with single quotes (less common): + ("id!=''", 'container!="", image!=""'), +] + + +def _rewrite_label_names(text: str) -> str: + """Replace docker-compose label tokens with their K8s equivalents. + + Uses a word-boundary regex so partial matches in unrelated text are safe. + """ + for src, dst in LABEL_REWRITES: + text = re.sub(rf"\b{re.escape(src)}\b", dst, text) + return text + + +def _rewrite_id_filters(text: str) -> str: + for src, dst in ID_FILTER_REWRITES: + text = text.replace(src, dst) + return text + + +# Matches a `container` (or post-rewrite equivalent) selector value, e.g. +# container="database" +# container=~"database|infrahub-server" +# container!~"foo" +# Group 1: the value inside the quotes. +_SELECTOR_RE = re.compile(r'(?P\bcontainer\s*[!=]~?\s*)"(?P[^"]*)"') + + +def _rewrite_selector_values(text: str) -> str: + """Apply VALUE_REWRITES inside container=...".." selectors. + + Only rewrites exact-match values and clean regex-alternation branches: + + container="database" → container="neo4j" + container=~"database|infrahub-server" → container=~"neo4j|infrahub-server" + + Does NOT rewrite values nested inside larger regex patterns like + `.*(database|cache|queue).*` — those are fuzzy matches whose K8s + equivalents depend on which substrings each K8s container name + contains, and a literal substitution would produce wrong results + (e.g., `cache-master` would incorrectly get rewritten). + + Dashboards using such fuzzy patterns (currently only LogQL queries in + infrahub_monitoring.json) will work only partially in K8s — the branches + that happen to match a K8s container name as a substring keep working; + the others don't. The static dashboard validator catches this as + unknown-label-value warnings. + """ + + def replace(match: re.Match[str]) -> str: + lhs = match.group("lhs") + val = match.group("val") + # Skip values that contain regex metacharacters beyond a simple + # alternation — we can't reliably rewrite fuzzy patterns like + # ".*(database|cache).*" because doing so would also touch + # substrings that should remain (e.g., `cache-master`). + if re.search(r"[.*?+\[\](){}^$\\]", val): + return match.group(0) + branches = val.split("|") + rewritten = [VALUE_REWRITES.get(b, b) for b in branches] + return f'{lhs}"{"|".join(rewritten)}"' + + return _SELECTOR_RE.sub(replace, text) + + +def transform_expr(expr: str) -> str: + """Apply the full PromQL transform pipeline to a single expression.""" + expr = _rewrite_id_filters(expr) + expr = _rewrite_label_names(expr) + expr = _rewrite_selector_values(expr) + return expr + + +def _is_promql_string(s: str) -> bool: + """Heuristic: does this string look like a PromQL/LogQL expression? + + Avoids touching panel titles and prose. Triggers on any of: + - presence of `{` (label selectors) + - PromQL aggregation keywords (rate, sum, avg, count, by) + - LogQL stream selectors (component=...) + - References to known metric prefixes (container_, node_, infrahub_, etc.) + """ + return bool( + re.search( + r"\{|" + r"\b(rate|sum|avg|count|max|min|histogram_quantile|by|without)\s*\(|" + r"\b(container|node|infrahub|loki|prefect|rabbitmq|prometheus)_", + s, + ) + ) + + +def _walk(node: Any) -> Any: + """Recursively transform expr fields and PromQL-shaped template queries. + + Also rewrites Grafana legend templates (`legendFormat`) which reference + label names via `{{label_name}}` interpolation — when we rename the + label in the query, we have to rename it in the legend too or the + legend renders empty. + """ + if isinstance(node, dict): + for key, val in node.items(): + if key == "expr" and isinstance(val, str): + node[key] = transform_expr(val) + elif key == "legendFormat" and isinstance(val, str): + # Legend uses {{label}} syntax; reuse the label-name rewrite. + node[key] = _rewrite_label_names(val) + elif key == "query" and isinstance(val, str) and _is_promql_string(val): + node[key] = transform_expr(val) + elif key == "query" and isinstance(val, dict) and isinstance( + val.get("query"), str + ) and _is_promql_string(val["query"]): + val["query"] = transform_expr(val["query"]) + else: + _walk(val) + elif isinstance(node, list): + for item in node: + _walk(item) + return node + + +def transform_dashboard(raw: str) -> str: + """Transform a dashboard JSON document. Idempotent. + + Returns the input unchanged (byte-for-byte) when the transform makes no + structural changes. This is important because Grafana exports use a + non-standard indentation (4 spaces at depth 1, +2 per level after) that + no standard JSON serializer produces. Reformatting every dashboard on + every sync would mask real changes in PR diffs with noise. + """ + data = json.loads(raw) + original = json.dumps(data, sort_keys=True) + _walk(data) + transformed = json.dumps(data, sort_keys=True) + if original == transformed: + return raw + return json.dumps(data, indent=2) + + +def main() -> int: + parser = argparse.ArgumentParser(description=__doc__.split("\n\n", 1)[0]) + parser.add_argument( + "--in-place", + metavar="FILE", + type=Path, + help="rewrite FILE in place; otherwise reads stdin and writes stdout", + ) + args = parser.parse_args() + + if args.in_place: + raw = args.in_place.read_text() + transformed = transform_dashboard(raw) + # Preserve the input's trailing-newline disposition. POSIX prefers + # a trailing newline, but upstream is inconsistent across files + # and matching the input keeps diffs minimal. + if raw.endswith("\n") and not transformed.endswith("\n"): + transformed += "\n" + args.in_place.write_text(transformed) + return 0 + + raw = sys.stdin.read() + sys.stdout.write(transform_dashboard(raw)) + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/scripts/validate_dashboards.py b/scripts/validate_dashboards.py new file mode 100755 index 0000000..b03b69d --- /dev/null +++ b/scripts/validate_dashboards.py @@ -0,0 +1,233 @@ +#!/usr/bin/env python3 +"""Validate vendored Grafana dashboards against our chart's metric inventory. + +Two checks, both run against every dashboard JSON under +charts/infrahub-observability/dashboards/: + +1. HARD FAIL — denied tokens. Any occurrence of a token listed under + `denied_tokens` in scripts/known-metrics.yaml inside a query expr means + either the transform pipeline failed or upstream introduced a new + docker-compose-specific pattern we don't yet handle. Exits non-zero. + +2. SOFT WARN — unknown metrics. Every metric name referenced is matched + against the `collected_prefixes` glob list. Names that match none of + them are surfaced as warnings (informational; doesn't fail CI). These + are usually one of: a typo, a metric we should consider scraping, or + a third-party prefix we haven't added to the allowlist yet. + +Designed to run fast (<1 s on the full dashboard set) without a cluster. +""" + +from __future__ import annotations + +import argparse +import fnmatch +import json +import re +import sys +from pathlib import Path +from typing import Iterable + +try: + import yaml +except ImportError: + print( + "error: pyyaml is required. Install with `pip install pyyaml`.", + file=sys.stderr, + ) + sys.exit(2) + + +# Keywords that look like metric names in PromQL but aren't. Filtered out +# before checking against the allowlist so we don't have to list every +# function in the inventory. +PROMQL_KEYWORDS = frozenset( + { + "abs", "absent", "absent_over_time", "and", "atan", "atan2", + "avg", "avg_over_time", "bool", "bottomk", "by", "ceil", + "changes", "clamp", "clamp_max", "clamp_min", "cos", "cosh", + "count", "count_over_time", "count_values", "day_of_month", + "day_of_week", "day_of_year", "days_in_month", "delta", "deriv", + "exp", "floor", "group", "group_left", "group_right", + "histogram_avg", "histogram_count", "histogram_fraction", + "histogram_quantile", "histogram_stddev", "histogram_stdvar", + "histogram_sum", "holt_winters", "hour", "idelta", "if", + "ignoring", "increase", "irate", "label_format", "label_join", + "label_replace", "last_over_time", "ln", "log10", "log2", "max", + "max_over_time", "min", "min_over_time", "minute", "month", + "offset", "on", "or", "predict_linear", "present_over_time", + "quantile", "quantile_over_time", "rate", "resets", "round", + "scalar", "sgn", "sin", "sinh", "sort", "sort_desc", "sqrt", + "stddev", "stddev_over_time", "stdvar", "stdvar_over_time", + "sum", "sum_over_time", "tan", "tanh", "time", "timestamp", + "topk", "unless", "vector", "without", "year", + # LogQL specifics + "line_format", "json", "logfmt", "regexp", "pattern", "unwrap", + "rate_counter", "bytes_rate", "bytes_over_time", "first_over_time", + "last_over_time", "label_format", + # Grafana template-variable functions + "label_values", "query_result", + # Selector operator words that may slip through the regex + "level", "instance", "job", + } +) + +# Identifier-like tokens that are PromQL/LogQL operators or punctuation +# residue, not metric names. The regex below is permissive so we filter +# manually here as well. +NON_METRIC_TOKENS = frozenset( + {"i", "le", "id", "kube", "ingester", "deployment", "infrahub", + "cache", "container", "component", "namespace", "pod", "image", + "level", "method", "path", "branch", "env", "app_name", + "flow_name", "kv_name", "lock", "is_schedule_active", "group_left", + "ignoring"} +) + + +def _extract_metrics_and_labels(expr: str) -> tuple[set[str], set[str]]: + """Return (metric_names, label_names) referenced in a PromQL/LogQL expr. + + Tight heuristic: a metric name appears immediately before `{` (selector), + `[` (range), or as a bareword at expression top-level (`up`, `time()`). + Labels appear as `=` / `!=` / `=~` / `!~` inside + `{...}` selectors. This intentionally misses metrics inside complex + nested PromQL like `histogram_quantile(0.95, foo_bucket{})` where the + parser would need to look further than `foo_bucket{`, but in practice + every metric name appears with `{` or `[` adjacent at least once in + each dashboard, so we don't miss true positives at the dashboard level. + """ + metrics: set[str] = set() + labels: set[str] = set() + + # Metric names: identifier immediately followed by `{`, `[`, or end of + # token in a metric-only context (we accept anything followed by `{`/`[`). + for match in re.finditer( + r"\b([a-zA-Z_][a-zA-Z0-9_:]*)\s*[{\[]", expr + ): + name = match.group(1) + if name.startswith("__"): # Prometheus internals like __name__ + continue + if len(name) <= 1: # single-char variable like LogQL's `t` + continue + if name in PROMQL_KEYWORDS or name in NON_METRIC_TOKENS: + continue + metrics.add(name) + + # Labels: identifier followed by =/!=/=~/!~ inside {...}. + for sel_match in re.finditer(r"\{([^}]*)\}", expr): + for label_match in re.finditer( + r"\b([a-zA-Z_][a-zA-Z0-9_]*)\s*[!=]~?\s*\"", sel_match.group(1) + ): + labels.add(label_match.group(1)) + + return metrics, labels + + +def _walk_exprs(node: object) -> Iterable[str]: + if isinstance(node, dict): + for key, val in node.items(): + if key == "expr" and isinstance(val, str): + yield val + yield from _walk_exprs(val) + elif isinstance(node, list): + for item in node: + yield from _walk_exprs(item) + + +def _metric_is_collected(name: str, prefixes: list[str]) -> bool: + return any(fnmatch.fnmatchcase(name, p) for p in prefixes) + + +def validate( + dashboard_dir: Path, inventory_file: Path +) -> tuple[int, int]: + """Validate every dashboard. Returns (hard_failures, soft_warnings).""" + inventory = yaml.safe_load(inventory_file.read_text()) + collected_prefixes = list(inventory.get("collected_prefixes", [])) + denied_tokens = list(inventory.get("denied_tokens", [])) + + dashboards = sorted(dashboard_dir.glob("*.json")) + if not dashboards: + print(f"error: no dashboards found in {dashboard_dir}", file=sys.stderr) + return 1, 0 + + hard = 0 + soft = 0 + + for path in dashboards: + data = json.loads(path.read_text()) + exprs = list(_walk_exprs(data)) + + all_metrics: set[str] = set() + all_labels: set[str] = set() + for expr in exprs: + # Hard-fail: denied tokens + for tok in denied_tokens: + if tok in expr: + hard += 1 + print( + f"FAIL {path.name}: denied token `{tok}` " + f"found in expr: {expr[:120]}", + file=sys.stderr, + ) + break + m, l = _extract_metrics_and_labels(expr) + all_metrics |= m + all_labels |= l + + unknown = sorted( + m + for m in all_metrics + if not _metric_is_collected(m, collected_prefixes) + ) + if unknown: + soft += len(unknown) + print( + f"WARN {path.name}: {len(unknown)} metric name(s) not " + f"matched by any collected_prefix: {', '.join(unknown[:10])}" + + (f" (+ {len(unknown) - 10} more)" if len(unknown) > 10 else "") + ) + + print( + f" {path.name}: {len(exprs)} exprs, " + f"{len(all_metrics)} metrics, {len(all_labels)} labels" + ) + + print( + f"\nSummary: {hard} hard failure(s), {soft} soft warning(s) " + f"across {len(dashboards)} dashboard(s)" + ) + return hard, soft + + +def main() -> int: + parser = argparse.ArgumentParser(description=__doc__.split("\n\n", 1)[0]) + parser.add_argument( + "--dashboards", + type=Path, + default=Path("charts/infrahub-observability/dashboards"), + help="dashboard directory (default: charts/infrahub-observability/dashboards)", + ) + parser.add_argument( + "--inventory", + type=Path, + default=Path("scripts/known-metrics.yaml"), + help="metric inventory YAML (default: scripts/known-metrics.yaml)", + ) + parser.add_argument( + "--strict", + action="store_true", + help="treat soft warnings as failures too", + ) + args = parser.parse_args() + + hard, soft = validate(args.dashboards, args.inventory) + if hard > 0: + return 1 + if args.strict and soft > 0: + return 1 + return 0 + + +if __name__ == "__main__": + sys.exit(main())