Skip to content
Open
5 changes: 5 additions & 0 deletions changelog.d/tag_cardinality_limit_exclude_tags.feature.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
The `tag_cardinality_limit` transform now accepts an `exclude_tags` option (settable globally
and per-metric) that lets specific tag keys bypass cardinality limiting entirely — useful for
tags whose high cardinality is intentional.

authors: kaarolch
23 changes: 22 additions & 1 deletion src/transforms/tag_cardinality_limit/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ pub struct Config {

/// Configuration for the `tag_cardinality_limit` transform for a specific group of metrics.
#[configurable_component]
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct Inner {
/// How many distinct values to accept for any given key.
#[serde(default = "default_value_limit")]
Expand All @@ -62,6 +62,26 @@ pub struct Inner {
#[configurable(derived)]
#[serde(default)]
pub internal_metrics: InternalMetricsConfig,

/// Tag keys that bypass cardinality limiting entirely.
///
/// Listed tag keys are passed through unchanged on every event, are not counted against
/// `value_limit`, and never enter the cache. Useful for tags whose high cardinality is
/// intentional, such as `kube_pod_name` or `tenant_id`.
///
/// When set on a per-metric configuration, the effective exclusion list is the union of the
/// global `exclude_tags` and the per-metric `exclude_tags`.
///
/// Excluded tags do not contribute to `tag_value_limit_exceeded_total` and do not produce
/// `TagCardinalityLimitRejectingTag` or `TagCardinalityLimitRejectingEvent` internal events,
/// so they are invisible to cardinality-pressure dashboards by design.
#[serde(default)]
#[configurable(metadata(
docs::human_name = "Excluded Tag Keys",
docs::examples = "kube_pod_name",
docs::examples = "tenant_id",
))]
pub exclude_tags: Vec<String>,
}

/// Controls the approach taken for tracking tag cardinality.
Expand Down Expand Up @@ -149,6 +169,7 @@ impl GenerateConfig for Config {
value_limit: default_value_limit(),
limit_exceeded_action: default_limit_exceeded_action(),
internal_metrics: InternalMetricsConfig::default(),
exclude_tags: Vec::new(),
},
per_metric_limits: HashMap::default(),
})
Expand Down
61 changes: 55 additions & 6 deletions src/transforms/tag_cardinality_limit/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,29 @@ impl TagCardinalityLimit {
}
}

/// Returns the union of the global `exclude_tags` and the matching per-metric
/// `exclude_tags`, so global exclusions still apply when a per-metric config matches.
/// Deduped so that a key listed in both lists is only checked once per tag in the hot path.
fn effective_exclude_tags(&self, metric_key: Option<&MetricId>) -> Vec<&str> {
let global = self.config.global.exclude_tags.iter().map(String::as_str);
let per_metric = metric_key
.and_then(|id| {
self.config.per_metric_limits.iter().find(|(name, config)| {
**name == id.1 && (config.namespace.is_none() || config.namespace == id.0)
})
})
.into_iter()
.flat_map(|(_, c)| c.config.exclude_tags.iter().map(String::as_str));

let mut out: Vec<&str> = Vec::new();
for k in global.chain(per_metric) {
if !out.contains(&k) {
out.push(k);
}
}
out
}

/// Takes in key and a value corresponding to a tag on an incoming Metric
/// Event. If that value is already part of set of accepted values for that
/// key, then simply returns true. If that value is not yet part of the
Expand All @@ -65,23 +88,28 @@ impl TagCardinalityLimit {
key: &str,
value: &TagValueSet,
) -> bool {
let config = *self.get_config_for_metric(metric_key);
// Copy out the `Copy` fields up front to release the borrow on `self.config` before we
// take `&mut self.accepted_tags` below.
let (value_limit, mode) = {
let config = self.get_config_for_metric(metric_key);
(config.value_limit, config.mode)
};
let metric_accepted_tags = self.accepted_tags.entry(metric_key.cloned()).or_default();
let tag_value_set = metric_accepted_tags
.entry_ref(key)
.or_insert_with(|| AcceptedTagValueSet::new(config.value_limit, &config.mode));
.or_insert_with(|| AcceptedTagValueSet::new(value_limit, &mode));

if tag_value_set.contains(value) {
// Tag value has already been accepted, nothing more to do.
return true;
}

// Tag value not yet part of the accepted set.
if tag_value_set.len() < config.value_limit {
if tag_value_set.len() < value_limit {
// accept the new value
tag_value_set.insert(value.clone());

if tag_value_set.len() == config.value_limit {
if tag_value_set.len() == value_limit {
emit!(TagCardinalityValueLimitReached { key });
}

Expand Down Expand Up @@ -113,11 +141,14 @@ impl TagCardinalityLimit {

/// Record a key and value corresponding to a tag on an incoming Metric.
fn record_tag_value(&mut self, metric_key: Option<&MetricId>, key: &str, value: &TagValueSet) {
let config = *self.get_config_for_metric(metric_key);
let (value_limit, mode) = {
let config = self.get_config_for_metric(metric_key);
(config.value_limit, config.mode)
};
let metric_accepted_tags = self.accepted_tags.entry(metric_key.cloned()).or_default();
metric_accepted_tags
.entry_ref(key)
.or_insert_with(|| AcceptedTagValueSet::new(config.value_limit, &config.mode))
.or_insert_with(|| AcceptedTagValueSet::new(value_limit, &mode))
.insert(value.clone());
}

Expand All @@ -135,6 +166,15 @@ impl TagCardinalityLimit {
None
};
if let Some(tags_map) = metric.tags_mut() {
// Materialize as owned strings so the list can outlive the `&mut self` calls in the
// loops below. Linear scan is faster than a `HashSet` for typical exclusion sizes.
let excluded_keys: Vec<String> = self
.effective_exclude_tags(metric_key.as_ref())
.into_iter()
.map(str::to_owned)
.collect();
let is_excluded = |key: &str| excluded_keys.iter().any(|k| k == key);

match self
.get_config_for_metric(metric_key.as_ref())
.limit_exceeded_action
Expand All @@ -144,6 +184,9 @@ impl TagCardinalityLimit {
// doesn't change the behavior of the check.

for (key, value) in tags_map.iter_sets() {
if is_excluded(key) {
continue;
}
if self.tag_limit_exceeded(metric_key.as_ref(), key, value) {
let config = self.get_config_for_metric(metric_key.as_ref());
emit!(TagCardinalityLimitRejectingEvent {
Expand All @@ -158,13 +201,19 @@ impl TagCardinalityLimit {
}
}
for (key, value) in tags_map.iter_sets() {
if is_excluded(key) {
continue;
}
self.record_tag_value(metric_key.as_ref(), key, value);
}
}
LimitExceededAction::DropTag => {
let config = self.get_config_for_metric(metric_key.as_ref());
let include_extended_tags = config.internal_metrics.include_extended_tags;
tags_map.retain(|key, value| {
if is_excluded(key) {
return true;
}
if self.try_accept_tag(metric_key.as_ref(), key, value) {
true
} else {
Expand Down
Loading
Loading