From 993917c3fe5b95ce442751ac748e548d641e272c Mon Sep 17 00:00:00 2001 From: Eric Weaver Date: Wed, 10 Jun 2026 09:03:45 -0400 Subject: [PATCH] [DBMON-6715] Update TagManager to handle internal and db tags (#23976) * Update TagManager to handle internal and db tags correctly * Add changelog --- datadog_checks_base/changelog.d/23976.added | 1 + .../datadog_checks/base/utils/db/utils.py | 108 +++++--- .../tests/base/utils/db/test_util.py | 252 ++++++++++++------ 3 files changed, 254 insertions(+), 107 deletions(-) create mode 100644 datadog_checks_base/changelog.d/23976.added diff --git a/datadog_checks_base/changelog.d/23976.added b/datadog_checks_base/changelog.d/23976.added new file mode 100644 index 0000000000000..9e2c3848673f5 --- /dev/null +++ b/datadog_checks_base/changelog.d/23976.added @@ -0,0 +1 @@ +Add ``include_internal`` and ``include_db`` options to ``TagManager.get_tags`` so DBM checks can exclude internal resource tags (``dd.internal.*``) and the per-database ``db`` tag from tag views that should not carry them. diff --git a/datadog_checks_base/datadog_checks/base/utils/db/utils.py b/datadog_checks_base/datadog_checks/base/utils/db/utils.py index 5ad2a61a79d2f..8bb52064b1310 100644 --- a/datadog_checks_base/datadog_checks/base/utils/db/utils.py +++ b/datadog_checks_base/datadog_checks/base/utils/db/utils.py @@ -535,20 +535,42 @@ class TagType(Enum): KEYLESS = auto() +# Tags under keys with this prefix are internal resource tags (e.g. "dd.internal.resource") that are +# consumed by the agent's metric submission pipeline. They should be excluded from payloads that don't +# go through that pipeline, such as DBM metadata events. +INTERNAL_TAG_PREFIX = "dd.internal" + +# Key used for the per-database tag. DBM checks exclude this from instance-level views because +# the data is collected across all databases and the `db` tag is re-applied during ingestion. +DB_TAG_KEY = "db" + + class TagManager: """ Manages tags for a check. Tags are stored as a dictionary of key-value pairs for key-value tags and as a list of values for keyless tags useful for easy update and deletion. - There's an internal cache of the tag list to avoid generating the list of tag strings + There's an internal cache of the rendered tag lists to avoid generating the tag strings multiple times. """ def __init__(self, normalizer: Optional[Callable[[Union[str, bytes]], str]] = None) -> None: self._tags: Dict[Union[str, TagType], List[str]] = {} - self._cached_tag_list: Optional[tuple[str, ...]] = None + # Rendered tag strings split into three disjoint buckets. Each tag string is stored exactly + # once, and a single render pass populates all three: + # - _internal_tags: keys prefixed with "dd.internal" + # - _db_tags: the per-database "db" key + # - _base_tags: everything else (including keyless tags) + # get_tags() returns _base_tags merged with the requested optional buckets. + self._base_tags: tuple[str, ...] = () + self._internal_tags: tuple[str, ...] = () + self._db_tags: tuple[str, ...] = () + self._cache_valid: bool = False self._keyless: TagType = TagType.KEYLESS self._normalizer = normalizer + def _invalidate_cache(self) -> None: + self._cache_valid = False + def set_tag(self, key: Optional[str], value: str, replace: bool = False, normalize: bool = False) -> None: """ Set a tag with the given key and value. @@ -567,13 +589,18 @@ def set_tag(self, key: Optional[str], value: str, replace: bool = False, normali key = self._keyless if replace or key not in self._tags: + # Skip the cache invalidation if the value is unchanged. Dynamic tags (e.g. version, + # replication_role) are re-set with replace=True every check run but rarely change, so + # this lets the cached tag lists survive across runs. + if self._tags.get(key) == [value]: + return self._tags[key] = [value] - # Invalidate the cache since tags have changed - self._cached_tag_list = None + # Invalidate the caches since tags have changed + self._invalidate_cache() elif value not in self._tags[key]: self._tags[key].append(value) - # Invalidate the cache since tags have changed - self._cached_tag_list = None + # Invalidate the caches since tags have changed + self._invalidate_cache() def set_tags_from_list(self, tag_list: List[str], replace: bool = False, normalize: bool = False) -> None: """ @@ -586,7 +613,7 @@ def set_tags_from_list(self, tag_list: List[str], replace: bool = False, normali """ if replace: self._tags.clear() - self._cached_tag_list = None + self._invalidate_cache() for tag in tag_list: if ':' in tag: @@ -619,8 +646,8 @@ def delete_tag(self, key: Optional[str], value: Optional[str] = None, normalize: if value is None: # Delete the entire key del self._tags[key] - # Invalidate the cache - self._cached_tag_list = None + # Invalidate the caches + self._invalidate_cache() return True else: # Delete specific value if it exists @@ -629,39 +656,58 @@ def delete_tag(self, key: Optional[str], value: Optional[str] = None, normalize: # Clean up empty lists if not self._tags[key]: del self._tags[key] - # Invalidate the cache - self._cached_tag_list = None + # Invalidate the caches + self._invalidate_cache() return True return False - def _generate_tag_strings(self, tags_dict: Dict[Union[str, TagType], List[str]]) -> tuple[str, ...]: + def _rebuild_cache(self) -> None: """ - Generate a tuple of tag strings from a tags dictionary. - Args: - tags_dict (Dict[Union[str, TagType], List[str]]): Dictionary of tags to convert to strings - Returns: - tuple[str, ...]: Tuple of tag strings + Render the tags dict into the three disjoint buckets (base, internal, db) in a single pass. + For key-value tags the rendered form is "key:value"; for keyless tags it's just the value. """ - return tuple( - value if key == self._keyless else f"{key}:{value}" for key, values in tags_dict.items() for value in values - ) - - def get_tags(self) -> List[str]: + base: List[str] = [] + internal: List[str] = [] + db: List[str] = [] + for key, values in self._tags.items(): + if key == self._keyless: + base.extend(values) + continue + if isinstance(key, str) and key.startswith(INTERNAL_TAG_PREFIX): + bucket = internal + elif key == DB_TAG_KEY: + bucket = db + else: + bucket = base + bucket.extend("{}:{}".format(key, value) for value in values) + self._base_tags = tuple(base) + self._internal_tags = tuple(internal) + self._db_tags = tuple(db) + self._cache_valid = True + + def get_tags(self, include_internal: bool = True, include_db: bool = True) -> List[str]: """ Get a list of tag strings. For key-value tags, returns "key:value" format. For keyless tags, returns just the value. - The returned list is always sorted alphabetically. + Args: + include_internal (bool): If False, excludes internal resource tags (keys prefixed with + "dd.internal"). Useful for payloads that don't go through the agent's metric + submission pipeline, such as DBM metadata events. + include_db (bool): If False, excludes the per-database tag (the "db" key). Useful for + instance-level metrics and DBM events where the database is determined per-row or + during ingestion. Returns: - list: Sorted list of tag strings + list: List of tag strings """ - # Return cached list if available - if self._cached_tag_list is not None: - return list(self._cached_tag_list) - - # Generate and cache regular tags - self._cached_tag_list = self._generate_tag_strings(self._tags) - return list(self._cached_tag_list) + if not self._cache_valid: + self._rebuild_cache() + tags = list(self._base_tags) + if include_internal: + tags.extend(self._internal_tags) + if include_db: + tags.extend(self._db_tags) + return tags def now_ms() -> int: diff --git a/datadog_checks_base/tests/base/utils/db/test_util.py b/datadog_checks_base/tests/base/utils/db/test_util.py index d25e3b895c678..4fcb378be812a 100644 --- a/datadog_checks_base/tests/base/utils/db/test_util.py +++ b/datadog_checks_base/tests/base/utils/db/test_util.py @@ -502,46 +502,38 @@ def test_init(self): """Test initialization of TagManager""" tag_manager = TagManager() assert tag_manager._tags == {} - assert tag_manager._cached_tag_list is None + assert tag_manager._cache_valid is False assert tag_manager._keyless == TagType.KEYLESS @pytest.mark.parametrize( - 'key,value,expected_tags', + 'operations,expected_tags', [ - ('test_key', 'test_value', {'test_key': ['test_value']}), - (None, 'test_value', {TagType.KEYLESS: ['test_value']}), + pytest.param([('test_key', 'test_value', False)], {'test_key': ['test_value']}, id='single key-value'), + pytest.param([(None, 'test_value', False)], {TagType.KEYLESS: ['test_value']}, id='single keyless'), + pytest.param( + [('test_key', 'value1', False), ('test_key', 'value2', False)], + {'test_key': ['value1', 'value2']}, + id='append to existing key', + ), + pytest.param( + [('test_key', 'value1', False), ('test_key', 'value2', True)], + {'test_key': ['value2']}, + id='replace existing key', + ), + pytest.param( + [('test_key', 'test_value', False), ('test_key', 'test_value', False)], + {'test_key': ['test_value']}, + id='duplicate value ignored', + ), ], ) - def test_set_tag(self, key, value, expected_tags): - """Test setting tags with various combinations of key and value""" + def test_set_tag(self, operations, expected_tags): + """Test setting tags across single, keyless, append, replace, and duplicate scenarios""" tag_manager = TagManager() - tag_manager.set_tag(key, value) + for key, value, replace in operations: + tag_manager.set_tag(key, value, replace=replace) assert tag_manager._tags == expected_tags - assert tag_manager._cached_tag_list is None - - def test_set_tag_existing_key_append(self): - """Test appending a value to an existing key""" - tag_manager = TagManager() - tag_manager.set_tag('test_key', 'value1') - tag_manager.set_tag('test_key', 'value2') - assert tag_manager._tags == {'test_key': ['value1', 'value2']} - assert tag_manager._cached_tag_list is None - - def test_set_tag_existing_key_replace(self): - """Test replacing values for an existing key""" - tag_manager = TagManager() - tag_manager.set_tag('test_key', 'value1') - tag_manager.set_tag('test_key', 'value2', replace=True) - assert tag_manager._tags == {'test_key': ['value2']} - assert tag_manager._cached_tag_list is None - - def test_set_tag_duplicate_value(self): - """Test setting a duplicate value for a key""" - tag_manager = TagManager() - tag_manager.set_tag('test_key', 'test_value') - tag_manager.set_tag('test_key', 'test_value') - assert tag_manager._tags == {'test_key': ['test_value']} - assert tag_manager._cached_tag_list is None + assert tag_manager._cache_valid is False @pytest.mark.parametrize( 'key,values,delete_key,delete_value,expected_tags,description', @@ -575,13 +567,13 @@ def test_delete_tag(self, key, values, delete_key, delete_value, expected_tags, # Generate initial cache tag_manager.get_tags() - assert tag_manager._cached_tag_list is not None + assert tag_manager._cache_valid is True # Perform deletion assert tag_manager.delete_tag(delete_key, delete_value) # Verify cache is invalidated - assert tag_manager._cached_tag_list is None + assert tag_manager._cache_valid is False # Verify internal state assert tag_manager._tags == expected_tags @@ -633,13 +625,13 @@ def test_set_tags_from_list(self, initial_tags, tag_list, replace, expected_tags # Generate cache to ensure it exists _ = tag_manager.get_tags() - assert tag_manager._cached_tag_list is not None + assert tag_manager._cache_valid is True tag_manager.set_tags_from_list(tag_list, replace=replace) # Verify cache was invalidated when needed if replace: - assert tag_manager._cached_tag_list is None + assert tag_manager._cache_valid is False # Verify final state assert sorted(tag_manager.get_tags()) == sorted(expected_tags) @@ -676,11 +668,11 @@ def test_cache_management(self): # First call should generate cache _ = tag_manager.get_tags() - assert tag_manager._cached_tag_list is not None + assert tag_manager._cache_valid is True # Modify regular tags tag_manager.set_tag('regular_key2', 'regular_value2') - assert tag_manager._cached_tag_list is None + assert tag_manager._cache_valid is False # Verify all tags are included second_result = tag_manager.get_tags() @@ -704,6 +696,128 @@ def test_get_tags_mutation(self): assert new_tags == ['test_key:test_value'] assert new_tags != tags # The lists should be different objects + @pytest.mark.parametrize( + 'include_internal,include_db,expected_tags', + [ + pytest.param( + True, + True, + [ + 'env:prod', + 'db:mydb', + 'dd.internal.resource:database_instance:host1', + 'dd.internal.resource:aws_rds_instance:endpoint', + ], + id='full view', + ), + pytest.param( + False, + True, + ['env:prod', 'db:mydb'], + id='exclude internal', + ), + pytest.param( + True, + False, + [ + 'env:prod', + 'dd.internal.resource:database_instance:host1', + 'dd.internal.resource:aws_rds_instance:endpoint', + ], + id='exclude db', + ), + pytest.param( + False, + False, + ['env:prod'], + id='exclude internal and db', + ), + ], + ) + def test_get_tags_view_filtering(self, include_internal, include_db, expected_tags): + """Test that include_internal/include_db filter the dd.internal.* and db buckets""" + tag_manager = TagManager() + tag_manager.set_tag('env', 'prod') + tag_manager.set_tag('db', 'mydb') + tag_manager.set_tag('dd.internal.resource', 'database_instance:host1') + tag_manager.set_tag('dd.internal.resource', 'aws_rds_instance:endpoint') + + assert sorted(tag_manager.get_tags(include_internal=include_internal, include_db=include_db)) == sorted( + expected_tags + ) + + def test_get_tags_exclude_db_is_exact_key_match(self): + """A key like "dbms_flavor" must NOT be excluded by the db filter (exact-key match, not prefix)""" + tag_manager = TagManager() + tag_manager.set_tag('db', 'mydb') + tag_manager.set_tag('dbms_flavor', 'postgres') + + tags = tag_manager.get_tags(include_db=False) + assert 'db:mydb' not in tags + assert 'dbms_flavor:postgres' in tags + + def test_get_tags_filtering_reflects_mutations(self): + """Test that filtered views reflect subsequent mutations (cache invalidation)""" + tag_manager = TagManager() + tag_manager.set_tag('env', 'prod') + tag_manager.set_tag('dd.internal.resource', 'database_instance:host1') + + assert tag_manager.get_tags(include_internal=False) == ['env:prod'] + + # Reading a filtered view must not affect the full view + assert sorted(tag_manager.get_tags()) == sorted(['env:prod', 'dd.internal.resource:database_instance:host1']) + + tag_manager.set_tag('service', 'web') + assert sorted(tag_manager.get_tags(include_internal=False)) == sorted(['env:prod', 'service:web']) + + def test_set_tag_replace_noop_preserves_cache(self): + """Re-setting a tag with replace=True and the same value must not invalidate the cache""" + tag_manager = TagManager() + tag_manager.set_tag('env', 'prod') + tag_manager.set_tag('db', 'mydb') + + # Build the rendered cache + tag_manager.get_tags() + assert tag_manager._cache_valid is True + + # Re-setting an existing tag to the same value via replace is a no-op: cache survives + tag_manager.set_tag('env', 'prod', replace=True) + assert tag_manager._cache_valid is True + + # Setting a tag with a different value via replace still invalidates + tag_manager.set_tag('env', 'staging', replace=True) + assert tag_manager._cache_valid is False + assert 'env:staging' in tag_manager.get_tags() + assert 'env:prod' not in tag_manager.get_tags() + + def test_set_tag_replace_noop_collapses_multivalue(self): + """replace with the same single value is a no-op, but replacing a multi-value key is not""" + tag_manager = TagManager() + tag_manager.set_tag('team', 'a') + tag_manager.set_tag('team', 'b') # team now has two values + tag_manager.get_tags() + assert tag_manager._cache_valid is True + + # Replacing the multi-value key with a single value must invalidate and collapse it + tag_manager.set_tag('team', 'a', replace=True) + assert tag_manager._cache_valid is False + assert tag_manager._tags['team'] == ['a'] + + def test_filtered_view_cache(self): + """Test that the rendered buckets are cached and invalidated on mutation""" + tag_manager = TagManager() + tag_manager.set_tag('env', 'prod') + tag_manager.set_tag('db', 'mydb') + + assert tag_manager._cache_valid is False + assert tag_manager.get_tags(include_db=False) == ['env:prod'] + assert tag_manager._cache_valid is True + + # Mutating tags invalidates the cache + tag_manager.set_tag('service', 'web') + assert tag_manager._cache_valid is False + assert sorted(tag_manager.get_tags(include_db=False)) == sorted(['env:prod', 'service:web']) + # Normalization tests def mock_tag_normalizer(self, tag): """Mock normalizer that replaces spaces and hyphens with underscores and lowercases""" @@ -714,7 +828,7 @@ def test_init_with_normalizer(self): tag_manager = TagManager(normalizer=self.mock_tag_normalizer) assert tag_manager._normalizer == self.mock_tag_normalizer assert tag_manager._tags == {} - assert tag_manager._cached_tag_list is None + assert tag_manager._cache_valid is False def test_set_tag_with_normalization(self): """Test setting tags with normalization enabled""" @@ -741,50 +855,36 @@ def test_set_tag_without_normalization(self): tag_manager.set_tag('another-key', 'another value') assert tag_manager._tags == {'test-key': ['value with spaces'], 'another-key': ['another value']} - def test_set_tags_from_list_with_normalization(self): - """Test setting tags from list with normalization enabled""" - tag_manager = TagManager(normalizer=self.mock_tag_normalizer) - - tag_list = ['env:prod-test', 'service:web app', 'keyless-tag'] - tag_manager.set_tags_from_list(tag_list, normalize=True) - - expected_tags = sorted(['env:prod_test', 'service:web_app', 'keyless_tag']) - assert sorted(tag_manager.get_tags()) == expected_tags - - def test_set_tags_from_list_without_normalization(self): - """Test setting tags from list without normalization""" - tag_manager = TagManager(normalizer=self.mock_tag_normalizer) - - tag_list = ['env:prod-test', 'service:web app', 'keyless-tag'] - tag_manager.set_tags_from_list(tag_list, normalize=False) - - expected_tags = sorted(['env:prod-test', 'service:web app', 'keyless-tag']) - assert sorted(tag_manager.get_tags()) == expected_tags - - def test_delete_tag_with_normalization(self): - """Test deleting tags with normalization enabled""" + @pytest.mark.parametrize( + 'normalize,expected_tags', + [ + pytest.param(True, ['env:prod_test', 'service:web_app', 'keyless_tag'], id='normalize'), + pytest.param(False, ['env:prod-test', 'service:web app', 'keyless-tag'], id='no normalize'), + ], + ) + def test_set_tags_from_list_normalization(self, normalize, expected_tags): + """Test setting tags from a list with and without normalization""" tag_manager = TagManager(normalizer=self.mock_tag_normalizer) - # Set tag with normalization (only value normalized) - tag_manager.set_tag('test-key', 'value with spaces', normalize=True) - assert 'test-key' in tag_manager._tags - assert tag_manager._tags['test-key'] == ['value_with_spaces'] + tag_manager.set_tags_from_list(['env:prod-test', 'service:web app', 'keyless-tag'], normalize=normalize) - # Delete with normalization - should find the normalized value - result = tag_manager.delete_tag('test-key', 'value with spaces', normalize=True) - assert result is True - assert tag_manager._tags == {} + assert sorted(tag_manager.get_tags()) == sorted(expected_tags) - def test_delete_tag_without_normalization(self): - """Test deleting tags without normalization""" + @pytest.mark.parametrize( + 'normalize,stored_value', + [ + pytest.param(True, 'value_with_spaces', id='normalize'), + pytest.param(False, 'value with spaces', id='no normalize'), + ], + ) + def test_delete_tag_normalization(self, normalize, stored_value): + """Test setting then deleting a tag with and without normalization""" tag_manager = TagManager(normalizer=self.mock_tag_normalizer) - # Set tag without normalization - tag_manager.set_tag('test-key', 'value with spaces', normalize=False) - assert tag_manager._tags == {'test-key': ['value with spaces']} + tag_manager.set_tag('test-key', 'value with spaces', normalize=normalize) + assert tag_manager._tags == {'test-key': [stored_value]} - # Delete without normalization - result = tag_manager.delete_tag('test-key', 'value with spaces', normalize=False) + result = tag_manager.delete_tag('test-key', 'value with spaces', normalize=normalize) assert result is True assert tag_manager._tags == {}