From 6bb47c8c6f3c9a312ee32ec6f96d9402d378297a Mon Sep 17 00:00:00 2001 From: Planck Li Date: Thu, 28 May 2026 18:43:39 +0800 Subject: [PATCH 1/4] [Celerdata] Add slow_lock_held_time_ms and slow_lock_wait_time_ms metrics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit StarRocks FE introduced two summary metrics for slow-lock observability in https://github.com/StarRocks/starrocks/pull/66027 (back-ported to 4.0/3.5): - starrocks_fe_slow_lock_held_time_ms — how long locks were held when slow locks were detected (max across owners). - starrocks_fe_slow_lock_wait_time_ms — how long waiters waited before the lock was acquired. Both are emitted as Prometheus summary (quantiles 0.75/0.95/0.98/0.99/0.999 plus _sum / _count), so map them with the same three-line pattern already used by other histogram metrics in this integration. Add corresponding metadata.csv entries. Bump version to 1.3.0 and update CHANGELOG + README. Signed-off-by: Planck Li --- celerdata/CHANGELOG.md | 6 ++++++ celerdata/README.md | 2 +- celerdata/datadog_checks/celerdata/__about__.py | 2 +- celerdata/datadog_checks/celerdata/metrics.py | 6 ++++++ celerdata/metadata.csv | 6 ++++++ 5 files changed, 20 insertions(+), 2 deletions(-) diff --git a/celerdata/CHANGELOG.md b/celerdata/CHANGELOG.md index 77ff80d412..9a00cca619 100644 --- a/celerdata/CHANGELOG.md +++ b/celerdata/CHANGELOG.md @@ -1,5 +1,11 @@ # CHANGELOG - celerdata +## 1.3.0 / 2026-05-28 + +***Added***: + +* Add the `celerdata.fe.slow_lock_held_time_ms` and `celerdata.fe.slow_lock_wait_time_ms` summary metrics, surfacing FE slow-lock held time and wait time (introduced by StarRocks/starrocks#66027) + ## 1.2.1 / 2025-10-01 ***Fixed***: diff --git a/celerdata/README.md b/celerdata/README.md index 27a9dc97e3..544432abe3 100644 --- a/celerdata/README.md +++ b/celerdata/README.md @@ -18,7 +18,7 @@ To collect StarRocks [metrics][9] and logs: 2. Install the CelerData check on your host with the following command: ```shell - datadog-agent integration install -t datadog-celerdata==1.2.1 + datadog-agent integration install -t datadog-celerdata==1.3.0 ``` ### Configuration diff --git a/celerdata/datadog_checks/celerdata/__about__.py b/celerdata/datadog_checks/celerdata/__about__.py index a955fdae12..67bc602abf 100644 --- a/celerdata/datadog_checks/celerdata/__about__.py +++ b/celerdata/datadog_checks/celerdata/__about__.py @@ -1 +1 @@ -__version__ = "1.2.1" +__version__ = "1.3.0" diff --git a/celerdata/datadog_checks/celerdata/metrics.py b/celerdata/datadog_checks/celerdata/metrics.py index 40eb4bf512..b07232730c 100644 --- a/celerdata/datadog_checks/celerdata/metrics.py +++ b/celerdata/datadog_checks/celerdata/metrics.py @@ -63,6 +63,12 @@ "starrocks_fe_rps": "fe.rps", "starrocks_fe_safe_mode": "fe.safe_mode", "starrocks_fe_scheduled_tablet_num": "fe.scheduled_tablet_num", + "starrocks_fe_slow_lock_held_time_ms": "fe.slow_lock_held_time_ms", + "starrocks_fe_slow_lock_held_time_ms_count": "fe.slow_lock_held_time_ms.count", + "starrocks_fe_slow_lock_held_time_ms_sum": "fe.slow_lock_held_time_ms.sum", + "starrocks_fe_slow_lock_wait_time_ms": "fe.slow_lock_wait_time_ms", + "starrocks_fe_slow_lock_wait_time_ms_count": "fe.slow_lock_wait_time_ms.count", + "starrocks_fe_slow_lock_wait_time_ms_sum": "fe.slow_lock_wait_time_ms.sum", "starrocks_fe_slow_query": "fe.slow_query", "starrocks_fe_snmp": "fe.snmp", "starrocks_fe_table_num": "fe.table_num", diff --git a/celerdata/metadata.csv b/celerdata/metadata.csv index 87cd446d36..eede3e716d 100644 --- a/celerdata/metadata.csv +++ b/celerdata/metadata.csv @@ -63,6 +63,12 @@ celerdata.fe.shortcircuit_latency_ms.quantile,gauge,,,,,0,celerdata,, celerdata.fe.shortcircuit_latency_ms.sum,count,,,,,0,celerdata,, celerdata.fe.shortcircuit_query.count,count,,,,Total number of shortcircuit queries,0,celerdata,, celerdata.fe.shortcircuit_rpc.count,count,,,,Total number of shortcircuit RPCs,0,celerdata,, +celerdata.fe.slow_lock_held_time_ms.count,count,,,,Number of slow lock samples on which lock held time is recorded,0,celerdata,, +celerdata.fe.slow_lock_held_time_ms.quantile,gauge,,,,Quantiles (ms) of lock held time when slow locks are detected,0,celerdata,, +celerdata.fe.slow_lock_held_time_ms.sum,count,,,,Sum (ms) of lock held time across slow lock samples,0,celerdata,, +celerdata.fe.slow_lock_wait_time_ms.count,count,,,,Number of slow lock samples on which lock wait time is recorded,0,celerdata,, +celerdata.fe.slow_lock_wait_time_ms.quantile,gauge,,,,Quantiles (ms) of lock wait time when slow locks are detected,0,celerdata,, +celerdata.fe.slow_lock_wait_time_ms.sum,count,,,,Sum (ms) of lock wait time across slow lock samples,0,celerdata,, celerdata.fe.slow_query.count,count,,,,Total number of slow queries,0,celerdata,, celerdata.fe.snmp,gauge,,,,All TCP packets that were retransmitted,0,celerdata,, celerdata.fe.starmgr_journal_replay_ops.count,count,,,,Number of journals replayed,0,celerdata,, From 4b456f1d08c00231e33356c55f9b4bd6169ee316 Mon Sep 17 00:00:00 2001 From: Planck Li Date: Thu, 28 May 2026 18:43:39 +0800 Subject: [PATCH 2/4] [Celerdata] Add slow_lock_held_time_ms and slow_lock_wait_time_ms metrics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit StarRocks FE introduced two summary metrics for slow-lock observability in https://github.com/StarRocks/starrocks/pull/66027 (back-ported to 4.0/3.5): - starrocks_fe_slow_lock_held_time_ms — how long locks were held when slow locks were detected (max across owners). - starrocks_fe_slow_lock_wait_time_ms — how long waiters waited before the lock was acquired. Both are emitted as Prometheus summary (quantiles 0.75/0.95/0.98/0.99/0.999 plus _sum / _count), so map them with the same three-line pattern already used by other histogram metrics in this integration. Add corresponding metadata.csv entries. Bump version to 1.3.0 and update CHANGELOG + README. Also regenerate config_models/{defaults,instance}.py via `ddev validate models celerdata -s` (ddev 16.1.1) — the generated files had drifted from their spec.yaml source since the previous sync in 1.2.0, which CI flags as out-of-sync. These changes are auto-generated and not behavioral. Signed-off-by: Planck Li --- .../celerdata/config_models/defaults.py | 4 ++++ .../celerdata/config_models/instance.py | 14 +++++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/celerdata/datadog_checks/celerdata/config_models/defaults.py b/celerdata/datadog_checks/celerdata/config_models/defaults.py index 0013907872..a14932fbfb 100644 --- a/celerdata/datadog_checks/celerdata/config_models/defaults.py +++ b/celerdata/datadog_checks/celerdata/config_models/defaults.py @@ -40,6 +40,10 @@ def instance_enable_health_service_check(): return True +def instance_enable_legacy_tags_normalization(): + return True + + def instance_histogram_buckets_as_distributions(): return False diff --git a/celerdata/datadog_checks/celerdata/config_models/instance.py b/celerdata/datadog_checks/celerdata/config_models/instance.py index 07646f388b..465c3c5996 100644 --- a/celerdata/datadog_checks/celerdata/config_models/instance.py +++ b/celerdata/datadog_checks/celerdata/config_models/instance.py @@ -9,6 +9,7 @@ from typing import Any, Optional, Union from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator +from typing_extensions import Literal from datadog_checks.base.utils.functions import identity from datadog_checks.base.utils.models import validation @@ -16,6 +17,11 @@ from . import defaults, validators +SECURE_FIELD_NAMES = frozenset( + ['auth_token', 'kerberos_cache', 'kerberos_keytab', 'tls_ca_cert', 'tls_cert', 'tls_private_key'] +) + + class AuthToken(BaseModel): model_config = ConfigDict( arbitrary_types_allowed=True, @@ -93,6 +99,7 @@ class InstanceConfig(BaseModel): disable_generic_tags: Optional[bool] = None empty_default_hostname: Optional[bool] = None enable_health_service_check: Optional[bool] = None + enable_legacy_tags_normalization: Optional[bool] = None exclude_labels: Optional[tuple[str, ...]] = None exclude_metrics: Optional[tuple[str, ...]] = None exclude_metrics_by_labels: Optional[MappingProxyType[str, Union[bool, tuple[str, ...]]]] = None @@ -105,7 +112,7 @@ class InstanceConfig(BaseModel): ignore_connection_errors: Optional[bool] = None ignore_tags: Optional[tuple[str, ...]] = None include_labels: Optional[tuple[str, ...]] = None - kerberos_auth: Optional[str] = None + kerberos_auth: Optional[Literal['required', 'optional', 'disabled']] = None kerberos_cache: Optional[str] = None kerberos_delegate: Optional[bool] = None kerberos_force_initiate: Optional[bool] = None @@ -158,6 +165,11 @@ def _validate(cls, value, info): field_name = field.alias or info.field_name if field_name in info.context['configured_fields']: value = getattr(validators, f'instance_{info.field_name}', identity)(value, field=field) + + if info.field_name in SECURE_FIELD_NAMES: + validation.security.check_field_trusted_provider( + info.field_name, value, info.context.get('security_config') + ) else: value = getattr(defaults, f'instance_{info.field_name}', lambda: value)() From 0a5b56b7d44c2027f4ff564dec37e223674b5e26 Mon Sep 17 00:00:00 2001 From: Planck Li Date: Tue, 2 Jun 2026 12:04:59 +0800 Subject: [PATCH 3/4] [Celerdata] Sync conf.yaml.example with config spec template Regenerate conf.yaml.example via 'ddev validate config celerdata -s' so it matches the updated shared OpenMetrics config template. Fixes the 'validations' CI job which failed with 'conf.yaml.example is not in sync'. Signed-off-by: Planck Li --- celerdata/datadog_checks/celerdata/data/conf.yaml.example | 1 + 1 file changed, 1 insertion(+) diff --git a/celerdata/datadog_checks/celerdata/data/conf.yaml.example b/celerdata/datadog_checks/celerdata/data/conf.yaml.example index 829f07ce76..54ba994470 100644 --- a/celerdata/datadog_checks/celerdata/data/conf.yaml.example +++ b/celerdata/datadog_checks/celerdata/data/conf.yaml.example @@ -86,6 +86,7 @@ instances: ## @param exclude_metrics - list of strings - optional ## A list of metrics to exclude, with each entry being either ## the exact metric name or a regular expression. + ## ## In order to exclude all metrics but the ones matching a specific filter, ## you can use a negative lookahead regex like: ## - ^(?!foo).*$ From 973ce6f5b7717524e44e791156dc1bd4d9cf1192 Mon Sep 17 00:00:00 2001 From: Planck Li Date: Mon, 15 Jun 2026 16:55:09 +0800 Subject: [PATCH 4/4] [Celerdata] Bump datadog-checks-base minimum to 37.21.0 The previous floor (36.16.0) pulls ddtrace 2.10.6, whose sdist build requires pkg_resources and fails on Python 3.13 + modern setuptools, breaking the `test-minimum-base-package` CI job. Base 37.21.0 pulls ddtrace 3.12.5 which ships cp313 prebuilt wheels, so the source build step (and the pkg_resources lookup) is skipped entirely. Verified locally: `ddev test --compat celerdata` passes 2/2 in 7m02s on Python 3.13 with ddtrace 3.12.5 installed. Signed-off-by: Planck Li --- celerdata/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/celerdata/pyproject.toml b/celerdata/pyproject.toml index 13346cdf31..71ac4b35eb 100644 --- a/celerdata/pyproject.toml +++ b/celerdata/pyproject.toml @@ -29,7 +29,7 @@ classifiers = [ "Topic :: System :: Monitoring", ] dependencies = [ - "datadog-checks-base>=36.16.0", + "datadog-checks-base>=37.21.0", ] dynamic = [ "version",