Skip to content

Commit 177c34b

Browse files
committed
Allow memray stats to output a JSON report
Signed-off-by: Hao Sun <helioeccentric@gmail.com>
1 parent 16b2a50 commit 177c34b

6 files changed

Lines changed: 241 additions & 20 deletions

File tree

docs/stats.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ The output includes the following:
2020

2121
* Stack trace and **count** of the top 'n' largest allocating locations by number of allocations (*default: 5*, configurable with the ``-n`` command line param)
2222

23+
* (for JSON output only) Metadata about the tracked process
24+
2325
Basic Usage
2426
-----------
2527

news/377.feature.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Allow ``memray stats`` to output a JSON report via ``--json`` flag.

src/memray/_stats.pyi

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
from dataclasses import dataclass
22

3-
from ._memray import AllocatorType
43
from ._memray import PythonStackElement
54
from ._metadata import Metadata
65

@@ -11,6 +10,6 @@ class Stats:
1110
total_memory_allocated: int
1211
peak_memory_allocated: int
1312
allocation_count_by_size: dict[int, int]
14-
allocation_count_by_allocator: dict[AllocatorType, int]
13+
allocation_count_by_allocator: dict[str, int]
1514
top_locations_by_size: list[tuple[PythonStackElement, int]]
1615
top_locations_by_count: list[tuple[PythonStackElement, int]]

src/memray/commands/stats.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,13 @@ def valid_positive_int(value: str) -> int:
3333
default=5,
3434
)
3535

36+
parser.add_argument(
37+
"--json",
38+
help="Exports stats to a JSON file",
39+
action="store_true",
40+
default=False,
41+
)
42+
3643
def run(self, args: argparse.Namespace, parser: argparse.ArgumentParser) -> None:
3744
result_path = Path(args.results)
3845
if not result_path.exists() or not result_path.is_file():
@@ -50,4 +57,4 @@ def run(self, args: argparse.Namespace, parser: argparse.ArgumentParser) -> None
5057
)
5158

5259
reporter = StatsReporter(stats, args.num_largest)
53-
reporter.render()
60+
reporter.render(to_json=args.json, result_path=result_path)

src/memray/reporters/stats.py

Lines changed: 69 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
1+
import datetime
2+
import json
13
import math
4+
import re
25
from collections import Counter
6+
from dataclasses import asdict
7+
from pathlib import Path
38
from typing import Dict
49
from typing import Iterator
510
from typing import List
@@ -10,6 +15,8 @@
1015
from memray._memray import size_fmt
1116
from memray._stats import Stats
1217

18+
PythonStackElement = Tuple[str, str, int]
19+
1320

1421
def get_histogram_databins(data: Dict[int, int], bins: int) -> List[Tuple[int, int]]:
1522
if bins <= 0:
@@ -89,7 +96,21 @@ def __init__(self, stats: Stats, num_largest: int):
8996
raise ValueError(f"Invalid input num_largest={num_largest}, should be >=1")
9097
self.num_largest = num_largest
9198

92-
def render(self) -> None:
99+
def render(self, to_json: bool = False, result_path: Path = Path(".")) -> None:
100+
histogram_params = dict(
101+
num_bins=10,
102+
histogram_scale_factor=25,
103+
)
104+
if to_json:
105+
# appends suffix (works better with --follow-fork)
106+
out_name = result_path.with_suffix(result_path.suffix + ".json").name
107+
out_name = re.sub("^memray-", "", out_name)
108+
out_path = result_path.parent / f"memray-stats-{out_name}"
109+
self._render_to_json(histogram_params, out_path)
110+
else:
111+
self._render_to_terminal(histogram_params)
112+
113+
def _render_to_terminal(self, histogram_params: Dict[str, int]) -> None:
93114
rich.print("📏 [bold]Total allocations:[/]")
94115
print(f"\t{self._stats.total_num_allocations}")
95116

@@ -98,35 +119,66 @@ def render(self) -> None:
98119
print(f"\t{size_fmt(self._stats.total_memory_allocated)}")
99120

100121
print()
101-
num_bins = 10
102-
histogram_scale_factor = 25
122+
103123
rich.print("📊 [bold]Histogram of allocation size:[/]")
104124
histogram = draw_histogram(
105125
self._stats.allocation_count_by_size,
106-
num_bins,
107-
hist_scale_factor=histogram_scale_factor,
126+
histogram_params["num_bins"],
127+
hist_scale_factor=histogram_params["histogram_scale_factor"],
108128
)
109129
print(f"\t{histogram}")
110130

111131
print()
112132
rich.print("📂 [bold]Allocator type distribution:[/]")
113-
for entry in self._get_allocator_type_distribution():
114-
print(f"\t {entry}")
133+
for allocator_name, count in self._get_allocator_type_distribution():
134+
print(f"\t {allocator_name}: {count}")
115135

116136
print()
117137
rich.print(
118138
f"🥇 [bold]Top {self.num_largest} largest allocating locations (by size):[/]"
119139
)
120-
for entry in self._get_top_allocations_by_size():
121-
print(f"\t- {entry}")
140+
for location, size in self._get_top_allocations_by_size():
141+
print(f"\t- {self._format_location(location)} -> {size_fmt(size)}")
122142

123143
print()
124144
rich.print(
125145
f"🥇 [bold]Top {self.num_largest} largest allocating "
126146
"locations (by number of allocations):[/]"
127147
)
128-
for entry in self._get_top_allocations_by_count():
129-
print(f"\t- {entry}")
148+
for location, count in self._get_top_allocations_by_count():
149+
print(f"\t- {self._format_location(location)} -> {count}")
150+
151+
def _render_to_json(self, histogram_params: Dict[str, int], out_path: Path) -> None:
152+
alloc_size_hist = get_histogram_databins(
153+
self._stats.allocation_count_by_size, bins=histogram_params["num_bins"]
154+
)
155+
156+
metadata = asdict(self._stats.metadata)
157+
for name, val in metadata.items():
158+
if isinstance(val, datetime.datetime):
159+
metadata[name] = str(val)
160+
161+
data = dict(
162+
total_num_allocations=self._stats.total_num_allocations,
163+
total_bytes_allocated=self._stats.total_memory_allocated,
164+
allocation_size_histogram=alloc_size_hist,
165+
allocator_type_distribution=[
166+
(allocation_type, count)
167+
for allocation_type, count in self._get_allocator_type_distribution()
168+
],
169+
top_allocations_by_size=[
170+
{"location": self._format_location(location), "size": size}
171+
for location, size in self._get_top_allocations_by_size()
172+
],
173+
top_allocations_by_count=[
174+
{"location": self._format_location(location), "count": count}
175+
for location, count in self._get_top_allocations_by_count()
176+
],
177+
metadata=metadata,
178+
)
179+
180+
with open(out_path, "w") as f:
181+
json.dump(data, f, indent=2)
130182

131183
@staticmethod
132184
def _format_location(loc: Tuple[str, str, int]) -> str:
@@ -135,18 +187,18 @@ def _format_location(loc: Tuple[str, str, int]) -> str:
135187
return "<stack trace unavailable>"
136188
return f"{function}:{file}:{line}"
137189

138-
def _get_top_allocations_by_size(self) -> Iterator[str]:
190+
def _get_top_allocations_by_size(self) -> Iterator[Tuple[PythonStackElement, int]]:
139191
for location, size in self._stats.top_locations_by_size:
140-
yield f"{self._format_location(location)} -> {size_fmt(size)}"
192+
yield (location, size)
141193

142-
def _get_top_allocations_by_count(self) -> Iterator[str]:
194+
def _get_top_allocations_by_count(self) -> Iterator[Tuple[PythonStackElement, int]]:
143195
for location, count in self._stats.top_locations_by_count:
144-
yield f"{self._format_location(location)} -> {count}"
196+
yield (location, count)
145197

146-
def _get_allocator_type_distribution(self) -> Iterator[str]:
198+
def _get_allocator_type_distribution(self) -> Iterator[Tuple[str, int]]:
147199
for allocator_name, count in sorted(
148200
self._stats.allocation_count_by_allocator.items(),
149201
key=lambda item: item[1],
150202
reverse=True,
151203
):
152-
yield f"{allocator_name}: {count}"
204+
yield (allocator_name, count)

tests/unit/test_stats_reporter.py

Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,16 @@
11
from collections import Counter
2+
from datetime import datetime
23
from typing import List
34
from typing import Optional
45
from typing import Tuple
6+
from unittest.mock import patch
57

68
import pytest
79

810
from memray import AllocatorType as AT
11+
from memray._metadata import Metadata
12+
from memray._stats import Stats
13+
from memray.reporters.stats import StatsReporter
914
from memray.reporters.stats import draw_histogram
1015
from memray.reporters.stats import get_histogram_databins
1116
from tests.utils import MockAllocationRecord
@@ -62,6 +67,62 @@ def _generate_mock_allocations(
6267
return snapshot
6368

6469

70+
# data generator for tests
71+
@pytest.fixture(scope="module")
72+
def fake_stats():
73+
mem_allocation_list = [
74+
2500,
75+
11000,
76+
11000,
77+
12000,
78+
60000,
79+
65000,
80+
120000,
81+
125000,
82+
125000,
83+
160000,
84+
170000,
85+
180000,
86+
800000,
87+
1500000,
88+
]
89+
90+
s = Stats(
91+
metadata=Metadata(
92+
start_time=datetime(2023, 1, 1, 1),
93+
end_time=datetime(2023, 1, 1, 2),
94+
total_allocations=sum(mem_allocation_list),
95+
total_frames=10,
96+
peak_memory=max(mem_allocation_list),
97+
command_line="fake stats",
98+
pid=123456,
99+
python_allocator="pymalloc",
100+
has_native_traces=False,
101+
),
102+
total_num_allocations=20,
103+
total_memory_allocated=sum(mem_allocation_list),
104+
peak_memory_allocated=max(mem_allocation_list),
105+
allocation_count_by_size=Counter(mem_allocation_list),
106+
allocation_count_by_allocator={
107+
AT.MALLOC.name: 1013,
108+
AT.REALLOC.name: 797,
109+
AT.CALLOC.name: 152,
110+
AT.MMAP.name: 4,
111+
},
112+
top_locations_by_count=[
113+
(("fake_func", "fake.py", 5), 20),
114+
(("fake_func2", "fake.py", 10), 50),
115+
(("__main__", "fake.py", 15), 1),
116+
],
117+
top_locations_by_size=[
118+
(("fake_func", "fake.py", 5), 5 * 2**20),
119+
(("fake_func2", "fake.py", 10), 3 * 2**10),
120+
(("__main__", "fake.py", 15), 4),
121+
],
122+
)
123+
return s
124+
125+
65126
# tests begin here
66127
def test_get_histogram_databins():
67128
# GIVEN
@@ -279,3 +340,102 @@ def test_draw_histogram_invalid_input():
279340
# test#3 - Invalid hist_scale_factor value
280341
with pytest.raises(ValueError):
281342
_ = draw_histogram([100, 200, 300], bins=5, hist_scale_factor=0)
343+
344+
345+
def test_stats_output(fake_stats):
346+
reporter = StatsReporter(fake_stats, 5)
347+
with patch("builtins.print") as mocked_print:
348+
with patch("rich.print", print):
349+
reporter.render()
350+
expected = (
351+
"📏 [bold]Total allocations:[/]\n"
352+
"\t20\n"
353+
"\n"
354+
"📦 [bold]Total memory allocated:[/]\n"
355+
"\t3.187MB\n"
356+
"\n"
357+
"📊 [bold]Histogram of allocation size:[/]\n"
358+
"\tmin: 2.441KB\n"
359+
"\t----------------------------------------\n"
360+
"\t< 4.628KB : 1 ▇▇▇▇▇\n"
361+
"\t< 8.775KB : 0 \n"
362+
"\t< 16.637KB : 3 ▇▇▇▇▇▇▇▇▇▇▇▇▇\n"
363+
"\t< 31.542KB : 0 \n"
364+
"\t< 59.802KB : 1 ▇▇▇▇▇\n"
365+
"\t< 113.378KB: 1 ▇▇▇▇▇\n"
366+
"\t< 214.954KB: 6 ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇\n"
367+
"\t< 407.531KB: 0 \n"
368+
"\t< 772.638KB: 0 \n"
369+
"\t<=1.431MB : 2 ▇▇▇▇▇▇▇▇▇\n"
370+
"\t----------------------------------------\n"
371+
"\tmax: 1.431MB\n"
372+
"\n"
373+
"📂 [bold]Allocator type distribution:[/]\n"
374+
"\t MALLOC: 1013\n"
375+
"\t REALLOC: 797\n"
376+
"\t CALLOC: 152\n"
377+
"\t MMAP: 4\n"
378+
"\n"
379+
"🥇 [bold]Top 5 largest allocating locations (by size):[/]\n"
380+
"\t- fake_func:fake.py:5 -> 5.000MB\n"
381+
"\t- fake_func2:fake.py:10 -> 3.000KB\n"
382+
"\t- __main__:fake.py:15 -> 4.000B\n"
383+
"\n"
384+
"🥇 [bold]Top 5 largest allocating locations (by number of allocations):[/]\n"
385+
"\t- fake_func:fake.py:5 -> 20\n"
386+
"\t- fake_func2:fake.py:10 -> 50\n"
387+
"\t- __main__:fake.py:15 -> 1"
388+
)
389+
printed = "\n".join(" ".join(x[0]) for x in mocked_print.call_args_list)
390+
assert expected == printed
391+
392+
393+
def test_stats_output_json(fake_stats, tmp_path):
394+
reporter = StatsReporter(fake_stats, 5)
395+
with patch("json.dump") as json_dump:
396+
reporter.render(to_json=True, result_path=tmp_path)
397+
expected = {
398+
"total_num_allocations": 20,
399+
"total_bytes_allocated": 3341500,
400+
"allocation_size_histogram": [
401+
(4739, 1),
402+
(8986, 0),
403+
(17036, 3),
404+
(32299, 0),
405+
(61237, 1),
406+
(116099, 1),
407+
(220113, 6),
408+
(417312, 0),
409+
(791181, 0),
410+
(1500000, 2),
411+
],
412+
"allocator_type_distribution": [
413+
("MALLOC", 1013),
414+
("REALLOC", 797),
415+
("CALLOC", 152),
416+
("MMAP", 4),
417+
],
418+
"top_allocations_by_size": [
419+
{"location": "fake_func:fake.py:5", "size": 5242880},
420+
{"location": "fake_func2:fake.py:10", "size": 3072},
421+
{"location": "__main__:fake.py:15", "size": 4},
422+
],
423+
"top_allocations_by_count": [
424+
{"location": "fake_func:fake.py:5", "count": 20},
425+
{"location": "fake_func2:fake.py:10", "count": 50},
426+
{"location": "__main__:fake.py:15", "count": 1},
427+
],
428+
"metadata": {
429+
"start_time": "2023-01-01 01:00:00",
430+
"end_time": "2023-01-01 02:00:00",
431+
"total_allocations": 3341500,
432+
"total_frames": 10,
433+
"peak_memory": 1500000,
434+
"command_line": "fake stats",
435+
"pid": 123456,
436+
"python_allocator": "pymalloc",
437+
"has_native_traces": False,
438+
},
439+
}
440+
actual = json_dump.call_args[0][0]
441+
assert expected == actual

0 commit comments

Comments
 (0)