Skip to content

Commit 57d88cc

Browse files
Евгений БлиновЕвгений Блинов
authored andcommitted
Fix histogram
1 parent c85c346 commit 57d88cc

5 files changed

Lines changed: 234 additions & 31 deletions

File tree

microbenchmark/_render.py

Lines changed: 74 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,19 @@
33
import shutil
44
from collections.abc import Sequence
55

6+
_BLOCKS = ' ▁▂▃▄▅▆▇█'
7+
8+
9+
def _format_duration(t: float) -> str:
10+
"""Format *t* seconds into a compact human-readable string."""
11+
if t >= 1.0:
12+
return f'{t:.3f}s'
13+
if t >= 1e-3:
14+
return f'{t * 1e3:.2f}ms'
15+
if t >= 1e-6:
16+
return f'{t * 1e6:.2f}\u03bcs'
17+
return f'{t * 1e9:.2f}ns'
18+
619

720
def terminal_width() -> int:
821
"""Return the current terminal width, clamped to a minimum of 20."""
@@ -77,22 +90,58 @@ def pad_inner_line(line: str) -> str:
7790
return result
7891

7992

93+
def histogram_bounds(durations: Sequence[float]) -> tuple[float, float]:
94+
"""Return ``(lo, hi)`` bounds for a histogram: minimum and p99 value.
95+
96+
The p99 clip prevents extreme outliers from compressing the bulk of the
97+
distribution into the leftmost column.
98+
99+
Args:
100+
durations: Sequence of per-call timings in seconds.
101+
"""
102+
sorted_durs = sorted(durations)
103+
lo = sorted_durs[0]
104+
p99_idx = min(len(sorted_durs) - 1, int(len(sorted_durs) * 0.99))
105+
return lo, sorted_durs[p99_idx]
106+
107+
108+
def draw_histogram_axis(lo: float, hi: float, width: int) -> str:
109+
"""Return a single-line x-axis label for a histogram with bounds *lo*/*hi*.
110+
111+
The minimum value is left-aligned; the p99 clip value is right-aligned.
112+
Both are formatted with auto-selected time units (ns / μs / ms / s).
113+
114+
Args:
115+
lo: Minimum value displayed on the x-axis.
116+
hi: Maximum value (p99 clip point) displayed on the x-axis.
117+
width: Total width of the label in characters.
118+
"""
119+
if width < 1:
120+
return ''
121+
left = _format_duration(lo)
122+
right = _format_duration(hi) + ' (p99)'
123+
if len(left) + 1 + len(right) > width:
124+
return left[:width]
125+
spaces = width - len(left) - len(right)
126+
return left + ' ' * spaces + right
127+
128+
80129
def draw_histogram(durations: Sequence[float], width: int, height: int) -> list[str]:
81130
"""Render an ASCII bar chart of *durations* as a ``height`` x ``width`` grid.
82131
83-
The output is ``width`` characters wide and ``height`` rows tall. Filled
84-
cells use ``'█'``; empty cells use ``' '``. Returns an empty list when any
85-
dimension is zero/negative or when *durations* is empty.
132+
The output is ``width`` characters wide and ``height`` rows tall. Each
133+
cell uses one of the Unicode block characters ``' ▁▂▃▄▅▆▇█'`` so bar
134+
heights are resolved at 1/8-row precision, giving smooth transitions
135+
between adjacent buckets. Returns an empty list when any dimension is
136+
zero/negative or when *durations* is empty.
86137
87138
Internally the data is bucketed into at most 20 bins regardless of
88139
*width*. Each bin is then rendered as ``width // n_buckets`` characters
89-
wide. This prevents timer-quantisation artefacts (where most measurements
90-
snap to a handful of discrete nanosecond values) from producing a single
91-
column spike with stray isolated pixels elsewhere.
140+
wide. This prevents timer-quantisation artefacts from producing a single
141+
spike with stray isolated pixels.
92142
93-
The x-axis upper bound is clipped to the p99 value to prevent extreme
94-
outliers from compressing the bulk of the distribution. Values above the
95-
p99 clip point are omitted from buckets.
143+
The x-axis upper bound is clipped to the p99 value via
144+
:func:`histogram_bounds`. Values above the clip point are omitted.
96145
97146
When all values are identical (``hi == lo``), the middle bin is drawn
98147
full-height and all others are left empty.
@@ -105,14 +154,10 @@ def draw_histogram(durations: Sequence[float], width: int, height: int) -> list[
105154
if width < 1 or height < 1 or len(durations) == 0:
106155
return []
107156

108-
sorted_durs = sorted(durations)
109-
lo = sorted_durs[0]
110-
p99_idx = min(len(sorted_durs) - 1, int(len(sorted_durs) * 0.99))
111-
hi = sorted_durs[p99_idx]
157+
lo, hi = histogram_bounds(durations)
112158

113-
# Cap the number of data buckets at 20 so that each bucket spans a
114-
# visible fraction of the output width and adjacent quantised values are
115-
# merged into the same bar.
159+
# Cap the number of data buckets at 20 so that adjacent quantised timer
160+
# values are merged into the same wider bar.
116161
n_buckets = min(width, 20)
117162
counts = [0] * n_buckets
118163

@@ -127,16 +172,22 @@ def draw_histogram(durations: Sequence[float], width: int, height: int) -> list[
127172
counts[idx] += 1
128173

129174
max_count = max(counts)
130-
bar_heights = [
131-
round(c / max_count * height) if c > 0 else 0
175+
bar_heights_float = [
176+
c / max_count * height if c > 0 else 0.0
132177
for c in counts
133178
]
134179

135180
rows: list[str] = []
136181
for row in range(height - 1, -1, -1):
137-
line = ''.join(
138-
'█' if bar_heights[col * n_buckets // width] > row else ' '
139-
for col in range(width)
140-
)
141-
rows.append(line)
182+
line_chars: list[str] = []
183+
for col in range(width):
184+
bh = bar_heights_float[col * n_buckets // width]
185+
if bh >= row + 1:
186+
line_chars.append('█')
187+
elif bh > row:
188+
frac = bh - row
189+
line_chars.append(_BLOCKS[max(1, min(8, round(frac * 8)))])
190+
else:
191+
line_chars.append(' ')
192+
rows.append(''.join(line_chars))
142193
return rows

microbenchmark/scenario.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,13 @@
99
from printo.reprs import superrepr
1010
from sigmatch import SignatureMismatchError
1111

12-
from microbenchmark._render import draw_box, draw_histogram, terminal_width
12+
from microbenchmark._render import (
13+
draw_box,
14+
draw_histogram,
15+
draw_histogram_axis,
16+
histogram_bounds,
17+
terminal_width,
18+
)
1319
from microbenchmark.arguments import arguments as Arguments # noqa: N812
1420
from microbenchmark.benchmark_result import BenchmarkResult
1521

@@ -105,8 +111,11 @@ def cli(self, argv: list[str] | None = None) -> None:
105111
width = terminal_width()
106112
lines = _render_result(result)
107113
if cli_args.histogram:
114+
hist_width = width - 4
108115
lines.append('')
109-
lines.extend(draw_histogram(list(result.durations), width - 4, 8))
116+
lines.extend(draw_histogram(list(result.durations), hist_width, 8))
117+
lo, hi = histogram_bounds(result.durations)
118+
lines.append(draw_histogram_axis(lo, hi, hist_width))
110119
box = draw_box(lines, width)
111120
sys.stdout.write('\n'.join(box) + '\n')
112121

microbenchmark/scenario_group.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,14 @@
33
import argparse
44
import sys
55

6-
from microbenchmark._render import draw_box, draw_histogram, draw_nested, terminal_width
6+
from microbenchmark._render import (
7+
draw_box,
8+
draw_histogram,
9+
draw_histogram_axis,
10+
draw_nested,
11+
histogram_bounds,
12+
terminal_width,
13+
)
714
from microbenchmark.benchmark_result import BenchmarkResult
815
from microbenchmark.scenario import Scenario, _render_result
916

@@ -51,8 +58,11 @@ def cli(self, argv: list[str] | None = None) -> None:
5158
results.append(result)
5259
lines = _render_result(result)
5360
if cli_args.histogram:
61+
hist_width = inner_width - 4
5462
lines.append('')
55-
lines.extend(draw_histogram(list(result.durations), inner_width - 4, 8))
63+
lines.extend(draw_histogram(list(result.durations), hist_width, 8))
64+
lo, hi = histogram_bounds(result.durations)
65+
lines.append(draw_histogram_axis(lo, hi, hist_width))
5666
inner_blocks.append(draw_box(lines, inner_width))
5767

5868
nested = draw_nested(inner_blocks, [], width)

tests/typing/test_render_types.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,14 @@
22

33
import pytest
44

5-
from microbenchmark._render import draw_box, draw_histogram, draw_nested, terminal_width
5+
from microbenchmark._render import (
6+
draw_box,
7+
draw_histogram,
8+
draw_histogram_axis,
9+
draw_nested,
10+
histogram_bounds,
11+
terminal_width,
12+
)
613

714

815
@pytest.mark.mypy_testing
@@ -27,3 +34,15 @@ def test_draw_nested_returns_list_of_str():
2734
def test_draw_histogram_returns_list_of_str():
2835
result = draw_histogram([0.001, 0.002], 10, 4)
2936
reveal_type(result) # N: Revealed type is "builtins.list[builtins.str]"
37+
38+
39+
@pytest.mark.mypy_testing
40+
def test_histogram_bounds_returns_tuple():
41+
result = histogram_bounds([0.001, 0.002])
42+
reveal_type(result) # N: Revealed type is "tuple[builtins.float, builtins.float]"
43+
44+
45+
@pytest.mark.mypy_testing
46+
def test_draw_histogram_axis_returns_str():
47+
result = draw_histogram_axis(0.000005, 0.000006, 40)
48+
reveal_type(result) # N: Revealed type is "builtins.str"

tests/units/test_render.py

Lines changed: 117 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,14 @@
66
import sys
77
import textwrap
88

9-
from microbenchmark._render import draw_box, draw_histogram, draw_nested, terminal_width
9+
from microbenchmark._render import (
10+
draw_box,
11+
draw_histogram,
12+
draw_histogram_axis,
13+
draw_nested,
14+
histogram_bounds,
15+
terminal_width,
16+
)
1017

1118
_UTF8_ENV = {**os.environ, 'PYTHONUTF8': '1'}
1219

@@ -253,6 +260,100 @@ def fake_timer():
253260
assert proc.stdout.count('╰') >= 3
254261

255262

263+
# ---------------------------------------------------------------------------
264+
# histogram_bounds
265+
# ---------------------------------------------------------------------------
266+
267+
268+
def test_histogram_bounds_returns_tuple():
269+
lo, hi = histogram_bounds([0.001, 0.002, 0.003])
270+
271+
assert isinstance(lo, float)
272+
assert isinstance(hi, float)
273+
274+
275+
def test_histogram_bounds_lo_is_min():
276+
lo, _ = histogram_bounds([0.003, 0.001, 0.002])
277+
278+
assert lo == 0.001
279+
280+
281+
def test_histogram_bounds_hi_is_p99():
282+
# 200 values; p99_idx = int(200 * 0.99) = 198 → sorted[198]
283+
durs = [i * 0.001 for i in range(1, 201)]
284+
_, hi = histogram_bounds(durs)
285+
286+
assert hi == durs[198]
287+
288+
289+
def test_histogram_bounds_single_value():
290+
lo, hi = histogram_bounds([0.005])
291+
292+
assert lo == hi == 0.005
293+
294+
295+
# ---------------------------------------------------------------------------
296+
# draw_histogram_axis
297+
# ---------------------------------------------------------------------------
298+
299+
300+
def test_draw_histogram_axis_returns_str():
301+
result = draw_histogram_axis(0.000005, 0.000006, 40)
302+
303+
assert isinstance(result, str)
304+
305+
306+
def test_draw_histogram_axis_width_matches():
307+
width = 40
308+
result = draw_histogram_axis(0.000005, 0.000006, width)
309+
310+
assert len(result) == width
311+
312+
313+
def test_draw_histogram_axis_contains_left_label():
314+
result = draw_histogram_axis(0.000005, 0.000006, 40)
315+
316+
assert '5.00\u03bcs' in result
317+
318+
319+
def test_draw_histogram_axis_contains_right_label():
320+
result = draw_histogram_axis(0.000005, 0.000006, 40)
321+
322+
assert 'p99' in result
323+
assert '6.00\u03bcs' in result
324+
325+
326+
def test_draw_histogram_axis_units_ns():
327+
result = draw_histogram_axis(1e-10, 2e-10, 40)
328+
329+
assert 'ns' in result
330+
331+
332+
def test_draw_histogram_axis_units_ms():
333+
result = draw_histogram_axis(0.001, 0.002, 40)
334+
335+
assert 'ms' in result
336+
337+
338+
def test_draw_histogram_axis_units_s():
339+
result = draw_histogram_axis(1.5, 2.0, 40)
340+
341+
assert 's' in result
342+
343+
344+
def test_draw_histogram_axis_zero_width_returns_empty():
345+
result = draw_histogram_axis(0.000005, 0.000006, 0)
346+
347+
assert result == ''
348+
349+
350+
def test_draw_histogram_axis_truncates_when_too_narrow():
351+
# width smaller than left label + space + right label → fallback to left only
352+
result = draw_histogram_axis(0.000005, 0.000006, 5)
353+
354+
assert len(result) == 5
355+
356+
256357
# ---------------------------------------------------------------------------
257358
# draw_histogram
258359
# ---------------------------------------------------------------------------
@@ -338,12 +439,25 @@ def test_draw_histogram_all_equal_height_matches():
338439
assert all(len(row) == 11 for row in result)
339440

340441

341-
def test_draw_histogram_only_block_or_space():
442+
def test_draw_histogram_only_valid_chars():
443+
# Cells may be full block, one of seven partial-fill blocks, or space.
342444
result = draw_histogram([0.001, 0.002, 0.003], 10, 4)
445+
valid = set(' \u2581\u2582\u2583\u2584\u2585\u2586\u2587\u2588')
343446

344447
for row in result:
345448
for ch in row:
346-
assert ch in ('█', ' ')
449+
assert ch in valid
450+
451+
452+
def test_draw_histogram_partial_blocks_appear():
453+
# With non-equal bucket counts the tallest bar fills height=5 rows and a
454+
# shorter bar gets a fractional top row rendered as a partial block.
455+
# counts → [4, 0, ..., 0, 3]: height_float for bucket-9 = 3/4 * 5 = 3.75
456+
result = draw_histogram([0.001] * 4 + [0.003] * 3, 10, 5)
457+
all_text = ''.join(result)
458+
partial = '\u2581\u2582\u2583\u2584\u2585\u2586\u2587'
459+
460+
assert any(ch in partial for ch in all_text)
347461

348462

349463
def test_draw_histogram_strings_in_list():

0 commit comments

Comments
 (0)