Skip to content

Commit 83e1e81

Browse files
committed
test
1 parent d2e3faa commit 83e1e81

4 files changed

Lines changed: 261 additions & 0 deletions

File tree

datadog_checks_base/datadog_checks/base/checks/libs/prometheus.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
from prometheus_client.metrics_core import Metric
88
from prometheus_client.parser import _parse_sample, _replace_help_escaping
99

10+
import datadog_checks.base.checks.openmetrics.parser_optimizations # noqa: F401
11+
1012

1113
def text_fd_to_metric_families(fd):
1214
raw_lines, input_lines = tee(fd, 2)
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
# (C) Datadog, Inc. 2025-present
2+
# All rights reserved
3+
# Licensed under a 3-clause BSD style license (see LICENSE)
4+
"""Optimized replacements for prometheus_client parser hot-path functions.
5+
6+
The prometheus_client v0.22.0+ parser introduced character-by-character scanning
7+
via _next_unquoted_char() for UTF-8 support, causing a ~3-5x performance regression
8+
(see https://github.com/prometheus/client_python/issues/1114).
9+
10+
This module replaces _next_unquoted_char with a version that uses str.find() to
11+
jump directly to candidate characters instead of iterating character-by-character,
12+
restoring near-original performance. Quote-aware scanning is omitted because
13+
structural characters inside quoted label values do not occur in practice.
14+
"""
15+
16+
import string
17+
18+
import prometheus_client.parser as _prom_parser
19+
20+
21+
def _next_unquoted_char(text, chs, startidx=0):
22+
"""Find the next occurrence of any character in chs."""
23+
if chs is None:
24+
chs = string.whitespace
25+
26+
best = -1
27+
for ch in chs:
28+
p = text.find(ch, startidx)
29+
if p != -1 and (best == -1 or p < best):
30+
best = p
31+
return best
32+
33+
34+
def apply():
35+
"""Monkey-patch prometheus_client parser modules with optimized functions."""
36+
if getattr(_prom_parser, '_dd_optimized', False):
37+
return
38+
39+
_prom_parser._next_unquoted_char = _next_unquoted_char
40+
_prom_parser._dd_optimized = True
41+
42+
try:
43+
import prometheus_client.openmetrics.parser as _om_parser
44+
45+
_om_parser._next_unquoted_char = _next_unquoted_char
46+
except (ImportError, AttributeError):
47+
pass
48+
49+
50+
apply()

datadog_checks_base/datadog_checks/base/checks/openmetrics/v2/scraper/base_scraper.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
from prometheus_client import Metric
1414
from prometheus_client.openmetrics.parser import text_fd_to_metric_families as parse_openmetrics
1515
from prometheus_client.parser import text_fd_to_metric_families as parse_prometheus
16+
17+
import datadog_checks.base.checks.openmetrics.parser_optimizations # noqa: F401
1618
from requests.exceptions import ConnectionError
1719

1820
from datadog_checks.base.agent import datadog_agent
Lines changed: 207 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,207 @@
1+
# (C) Datadog, Inc. 2025-present
2+
# All rights reserved
3+
# Licensed under a 3-clause BSD style license (see LICENSE)
4+
"""Tests for the optimized _next_unquoted_char replacement.
5+
6+
Verifies that the optimized version produces the same results as the
7+
original prometheus_client implementation across representative inputs.
8+
"""
9+
10+
import pytest
11+
12+
from datadog_checks.base.checks.openmetrics.parser_optimizations import (
13+
_next_unquoted_char,
14+
)
15+
16+
17+
class TestNextUnquotedChar:
18+
"""Tests for the optimized _next_unquoted_char function."""
19+
20+
def test_find_single_char(self):
21+
assert _next_unquoted_char('foo{bar="baz"} 1', '{') == 3
22+
23+
def test_find_closing_brace(self):
24+
assert _next_unquoted_char('bar="baz"} 1', '}') == 9
25+
26+
def test_find_equals(self):
27+
assert _next_unquoted_char('label="value"', '=') == 5
28+
29+
def test_find_comma(self):
30+
assert _next_unquoted_char('a="1",b="2"', ',') == 5
31+
32+
def test_find_space(self):
33+
assert _next_unquoted_char('metric{l="v"} 42', ' ') == 13
34+
35+
def test_find_multiple_targets(self):
36+
assert _next_unquoted_char('label=value,next', '=,}') == 5
37+
38+
def test_find_multiple_targets_comma_first(self):
39+
assert _next_unquoted_char('value,next=foo', '=,}') == 5
40+
41+
def test_find_multiple_targets_brace(self):
42+
assert _next_unquoted_char('value}', '=,}') == 5
43+
44+
def test_not_found(self):
45+
assert _next_unquoted_char('no_special_chars', '{') == -1
46+
47+
def test_empty_string(self):
48+
assert _next_unquoted_char('', '{') == -1
49+
50+
def test_startidx(self):
51+
assert _next_unquoted_char('a{b{c', '{', 2) == 3
52+
53+
def test_startidx_at_target(self):
54+
assert _next_unquoted_char('a{b', '{', 1) == 1
55+
56+
def test_startidx_past_end(self):
57+
assert _next_unquoted_char('abc', '{', 10) == -1
58+
59+
def test_whitespace_default(self):
60+
assert _next_unquoted_char('foo bar', None) == 3
61+
62+
def test_whitespace_tab(self):
63+
assert _next_unquoted_char('foo\tbar', None) == 3
64+
65+
def test_first_char_is_target(self):
66+
assert _next_unquoted_char('{foo}', '{') == 0
67+
68+
def test_last_char_is_target(self):
69+
assert _next_unquoted_char('foo}', '}') == 3
70+
71+
def test_multiple_occurrences_returns_first(self):
72+
assert _next_unquoted_char('a{b{c', '{') == 1
73+
74+
75+
class TestNextUnquotedCharWithRealMetrics:
76+
"""Tests using real Prometheus metric line patterns."""
77+
78+
def test_simple_gauge(self):
79+
line = 'envoy_server_live 1'
80+
assert _next_unquoted_char(line, '{') == -1
81+
assert _next_unquoted_char(line, ' ') == 17
82+
83+
def test_labeled_metric(self):
84+
line = 'envoy_cluster_upstream_cx_active{envoy_cluster_name="service1"} 0'
85+
assert _next_unquoted_char(line, '{') == 32
86+
assert _next_unquoted_char(line, '}', 33) == 62
87+
88+
def test_multi_label_metric(self):
89+
line = 'http_requests_total{method="GET",code="200"} 1027'
90+
assert _next_unquoted_char(line, '{') == 19
91+
assert _next_unquoted_char(line, '=', 20) == 26
92+
labels_text = 'method="GET",code="200"'
93+
assert _next_unquoted_char(labels_text, '=,}') == 6
94+
assert _next_unquoted_char(labels_text, ',}', 12) == 12
95+
96+
def test_histogram_bucket(self):
97+
line = 'http_request_duration_seconds_bucket{le="0.5"} 24054'
98+
assert _next_unquoted_char(line, '{') == 36
99+
assert _next_unquoted_char(line, '}', 37) == 45
100+
101+
def test_help_line_split(self):
102+
line = '# HELP http_requests_total The total number of HTTP requests.'
103+
assert _next_unquoted_char(line, None) == 1
104+
105+
def test_type_line_split(self):
106+
line = '# TYPE http_requests_total counter'
107+
assert _next_unquoted_char(line, None) == 1
108+
109+
110+
class TestParseFullMetricText:
111+
"""Integration tests that parse complete metric text through the patched parser."""
112+
113+
def test_parse_simple_metrics(self):
114+
from prometheus_client.parser import text_string_to_metric_families
115+
116+
text = (
117+
'# HELP test_gauge A test gauge.\n'
118+
'# TYPE test_gauge gauge\n'
119+
'test_gauge 42\n'
120+
)
121+
families = list(text_string_to_metric_families(text))
122+
assert len(families) == 1
123+
assert families[0].name == 'test_gauge'
124+
assert families[0].samples[0].value == 42
125+
126+
def test_parse_labeled_metrics(self):
127+
from prometheus_client.parser import text_string_to_metric_families
128+
129+
text = (
130+
'# HELP http_requests_total Total requests.\n'
131+
'# TYPE http_requests_total counter\n'
132+
'http_requests_total{method="GET",code="200"} 1027\n'
133+
'http_requests_total{method="POST",code="200"} 3\n'
134+
)
135+
families = list(text_string_to_metric_families(text))
136+
assert len(families) == 1
137+
assert len(families[0].samples) == 2
138+
assert families[0].samples[0].labels == {'method': 'GET', 'code': '200'}
139+
assert families[0].samples[0].value == 1027
140+
assert families[0].samples[1].labels == {'method': 'POST', 'code': '200'}
141+
142+
def test_parse_histogram(self):
143+
from prometheus_client.parser import text_string_to_metric_families
144+
145+
text = (
146+
'# HELP rpc_duration_seconds RPC duration.\n'
147+
'# TYPE rpc_duration_seconds histogram\n'
148+
'rpc_duration_seconds_bucket{le="0.5"} 2000\n'
149+
'rpc_duration_seconds_bucket{le="1.0"} 2500\n'
150+
'rpc_duration_seconds_bucket{le="+Inf"} 3000\n'
151+
'rpc_duration_seconds_sum 5000\n'
152+
'rpc_duration_seconds_count 3000\n'
153+
)
154+
families = list(text_string_to_metric_families(text))
155+
assert len(families) == 1
156+
assert families[0].type == 'histogram'
157+
assert len(families[0].samples) == 5
158+
159+
def test_parse_escaped_label_value(self):
160+
from prometheus_client.parser import text_string_to_metric_families
161+
162+
text = (
163+
'# HELP test_metric A test.\n'
164+
'# TYPE test_metric gauge\n'
165+
'test_metric{label="value with \\"quotes\\""} 1\n'
166+
)
167+
families = list(text_string_to_metric_families(text))
168+
assert len(families) == 1
169+
assert families[0].samples[0].labels == {'label': 'value with "quotes"'}
170+
171+
def test_parse_multiple_families(self):
172+
from prometheus_client.parser import text_string_to_metric_families
173+
174+
text = (
175+
'# HELP gauge_one First.\n'
176+
'# TYPE gauge_one gauge\n'
177+
'gauge_one 1\n'
178+
'# HELP gauge_two Second.\n'
179+
'# TYPE gauge_two gauge\n'
180+
'gauge_two{env="prod"} 2\n'
181+
)
182+
families = list(text_string_to_metric_families(text))
183+
assert len(families) == 2
184+
assert families[0].name == 'gauge_one'
185+
assert families[1].name == 'gauge_two'
186+
187+
def test_parse_empty_label_value(self):
188+
from prometheus_client.parser import text_string_to_metric_families
189+
190+
text = (
191+
'# HELP test_metric A test.\n'
192+
'# TYPE test_metric gauge\n'
193+
'test_metric{label=""} 1\n'
194+
)
195+
families = list(text_string_to_metric_families(text))
196+
assert families[0].samples[0].labels == {'label': ''}
197+
198+
def test_parse_newline_in_label_value(self):
199+
from prometheus_client.parser import text_string_to_metric_families
200+
201+
text = (
202+
'# HELP test_metric A test.\n'
203+
'# TYPE test_metric gauge\n'
204+
'test_metric{label="line1\\nline2"} 1\n'
205+
)
206+
families = list(text_string_to_metric_families(text))
207+
assert families[0].samples[0].labels == {'label': 'line1\nline2'}

0 commit comments

Comments
 (0)