Skip to content

Commit 2a234ce

Browse files
Fix tag growth between check runs in grpc_check (#2991)
* Fix tag growth between check runs * sync models * Address feedback from the review feedback * Fix formatting * Parametrize tests, fix changelog, use fstring * More fstrings
1 parent f12a916 commit 2a234ce

6 files changed

Lines changed: 346 additions & 26 deletions

File tree

grpc_check/CHANGELOG.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,13 @@
11
# CHANGELOG - gRPC Check
22

33

4+
## 1.1.1 / 2026-05-05
5+
6+
***Fixed***:
7+
8+
* Fix unbounded growth of tag lists on every collection run (aliases and appends to instance `tags`) causing linear Agent CPU/memory growth (AGENT-16111) ([#2991](https://github.com/DataDog/integrations-extras/pull/2991))
9+
* Reuse a single gRPC channel across check runs instead of opening and closing a channel every interval; RPC header interceptors are built once at init ([#2991](https://github.com/DataDog/integrations-extras/pull/2991))
10+
411
## 1.1.0 / 2026-01-16
512

613
***Added***:
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "1.1.0"
1+
__version__ = "1.2.0"

grpc_check/datadog_checks/grpc_check/check.py

Lines changed: 49 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -81,9 +81,33 @@ def __init__(self, name, init_config, instances):
8181
self.ca_cert = self.instance.get("ca_cert", "")
8282
self.secure_channel = self.instance.get("secure_channel", False)
8383
self._validate_configuration()
84-
self.tags = self.instance.get("tags", [])
85-
self.tags.append("grpc_server_address:{}".format(self.grpc_server_address))
86-
self.tags.append("grpc_server_service:{}".format(self.grpc_server_service))
84+
self._base_tags = list(self.instance.get("tags", []))
85+
self._base_tags.append(f"grpc_server_address:{self.grpc_server_address}")
86+
self._base_tags.append(f"grpc_server_service:{self.grpc_server_service}")
87+
88+
self._channel = None
89+
self._intercept_channel = None
90+
self._header_adder_interceptors = self._parse_rcp_headers(self.rpc_header)
91+
92+
def cancel(self):
93+
if self._channel is not None:
94+
try:
95+
self._channel.close()
96+
except Exception as e:
97+
self.log.warning("failed to close gRPC channel: %s", e)
98+
finally:
99+
self._channel = None
100+
self._intercept_channel = None
101+
102+
def _get_channel(self):
103+
if self._channel is None:
104+
self._channel = self._create_channel(self.instance)
105+
return self._channel
106+
107+
def _get_intercept_channel(self):
108+
if self._intercept_channel is None:
109+
self._intercept_channel = grpc.intercept_channel(self._get_channel(), *self._header_adder_interceptors)
110+
return self._intercept_channel
87111

88112
def _validate_configuration(self):
89113
if not self.grpc_server_address:
@@ -128,15 +152,15 @@ def _create_channel(self, instance):
128152
self.log.debug("creating an insecure channel")
129153
return grpc.insecure_channel(self.grpc_server_address)
130154

131-
def _send_healthy(self):
132-
self.gauge("grpc_check.healthy", 1, tags=self.tags)
133-
self.gauge("grpc_check.unhealthy", 0, tags=self.tags)
134-
self.service_check("grpc.healthy", AgentCheck.OK, tags=self.tags)
155+
def _send_healthy(self, tags):
156+
self.gauge("grpc_check.healthy", 1, tags=tags)
157+
self.gauge("grpc_check.unhealthy", 0, tags=tags)
158+
self.service_check("grpc.healthy", AgentCheck.OK, tags=tags)
135159

136-
def _send_unhealthy(self):
137-
self.gauge("grpc_check.healthy", 0, tags=self.tags)
138-
self.gauge("grpc_check.unhealthy", 1, tags=self.tags)
139-
self.service_check("grpc.healthy", AgentCheck.CRITICAL, tags=self.tags)
160+
def _send_unhealthy(self, tags):
161+
self.gauge("grpc_check.healthy", 0, tags=tags)
162+
self.gauge("grpc_check.unhealthy", 1, tags=tags)
163+
self.service_check("grpc.healthy", AgentCheck.CRITICAL, tags=tags)
140164

141165
def check(self, instance):
142166
self.log.debug(
@@ -147,12 +171,10 @@ def check(self, instance):
147171
status_code = grpc.StatusCode.UNKNOWN
148172
response = None
149173
try:
150-
with self._create_channel(instance) as channel:
151-
header_adder_interceptors = self._parse_rcp_headers(self.rpc_header)
152-
intercept_channel = grpc.intercept_channel(channel, *header_adder_interceptors)
153-
health_stub = health_pb2_grpc.HealthStub(intercept_channel)
154-
request = health_pb2.HealthCheckRequest(service=self.grpc_server_service)
155-
response = health_stub.Check(request, timeout=self.timeout)
174+
intercept_channel = self._get_intercept_channel()
175+
health_stub = health_pb2_grpc.HealthStub(intercept_channel)
176+
request = health_pb2.HealthCheckRequest(service=self.grpc_server_service)
177+
response = health_stub.Check(request, timeout=self.timeout)
156178
except grpc.RpcError as e:
157179
status_code = e.code()
158180
details = e.details()
@@ -177,32 +199,34 @@ def check(self, instance):
177199
details,
178200
)
179201
except Exception as e:
180-
self.log.error("failed to check: %s", str(e))
202+
self.log.exception("failed to check: %s", e)
181203

182-
if not response:
183-
self.tags.append("status_code:{}".format(status_code.name))
184-
self._send_unhealthy()
204+
if response is None:
205+
tags = list(self._base_tags)
206+
tags.append(f"status_code:{status_code.name}")
207+
self._send_unhealthy(tags)
185208
return
186209

187-
self.tags.append("status_code:{}".format(grpc.StatusCode.OK.name))
210+
tags = list(self._base_tags)
211+
tags.append(f"status_code:{grpc.StatusCode.OK.name}")
188212
if response.status == health_pb2.HealthCheckResponse.SERVING:
189213
self.log.debug(
190214
"grpc_server_address=%s, grpc_server_service=%s: healthy",
191215
self.grpc_server_address,
192216
self.grpc_server_service,
193217
)
194-
self._send_healthy()
218+
self._send_healthy(tags)
195219
elif response.status == health_pb2.HealthCheckResponse.NOT_SERVING:
196220
self.log.warning(
197221
"grpc_server_address=%s, grpc_server_service=%s: unhealthy",
198222
self.grpc_server_address,
199223
self.grpc_server_service,
200224
)
201-
self._send_unhealthy()
225+
self._send_unhealthy(tags)
202226
else:
203227
self.log.warning(
204228
"grpc_server_address=%s, grpc_server_service=%s: health check response was unknown",
205229
self.grpc_server_address,
206230
self.grpc_server_service,
207231
)
208-
self._send_unhealthy()
232+
self._send_unhealthy(tags)

grpc_check/datadog_checks/grpc_check/config_models/defaults.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,10 @@ def instance_empty_default_hostname():
2424
return False
2525

2626

27+
def instance_enable_legacy_tags_normalization():
28+
return True
29+
30+
2731
def instance_min_collection_interval():
2832
return 15
2933

grpc_check/datadog_checks/grpc_check/config_models/instance.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ class InstanceConfig(BaseModel):
3535
client_key: Optional[str] = None
3636
disable_generic_tags: Optional[bool] = None
3737
empty_default_hostname: Optional[bool] = None
38+
enable_legacy_tags_normalization: Optional[bool] = None
3839
grpc_server_address: str
3940
grpc_server_service: Optional[str] = None
4041
metric_patterns: Optional[MetricPatterns] = None

0 commit comments

Comments
 (0)