Skip to content

Commit fb4a7c8

Browse files
committed
fixup! fixup! claim credit!
1 parent 8e1d83b commit fb4a7c8

5 files changed

Lines changed: 73 additions & 70 deletions

File tree

Lib/profiling/sampling/constants.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,14 @@
1111
PROFILING_MODE_ALL = 3 # Combines GIL + CPU checks
1212
PROFILING_MODE_EXCEPTION = 4 # Only samples when thread has an active exception
1313

14+
PROFILING_MODE_NAMES = {
15+
PROFILING_MODE_WALL: "wall",
16+
PROFILING_MODE_CPU: "cpu",
17+
PROFILING_MODE_GIL: "gil",
18+
PROFILING_MODE_ALL: "all",
19+
PROFILING_MODE_EXCEPTION: "exception",
20+
}
21+
1422
# Sort mode constants
1523
SORT_MODE_NSAMPLES = 0
1624
SORT_MODE_TOTTIME = 1

Lib/profiling/sampling/jsonl_collector.py

Lines changed: 38 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -15,34 +15,36 @@
1515
1616
1. ``meta`` (exactly one, first line)::
1717
18-
{"type":"meta","v":1,"run_id":"<hex>",
18+
{"type":"meta","v":0,"run_id":"<hex>",
1919
"sample_interval_usec":<int>,"mode":"wall|cpu|gil|all|exception"}
2020
2121
``mode`` is omitted when not provided.
2222
23-
2. ``str_def`` (zero or more)::
23+
2. ``string_table`` (zero or more)::
2424
25-
{"type":"str_def","v":1,"run_id":"<hex>",
26-
"defs":[{"str_id":<int>,"value":"<str>"}, ...]}
25+
{"type":"string_table","v":0,"run_id":"<hex>",
26+
"strings":[{"str_id":<int>,"value":"<str>"}, ...]}
2727
2828
Strings (filenames, function names) are interned to keep repeated values
29-
compact. Each chunk holds up to ``_CHUNK_SIZE`` entries.
29+
compact. IDs are zero-based. Each chunk holds up to ``_CHUNK_SIZE``
30+
entries, and each entry carries its explicit ``str_id`` so consumers do
31+
not need to infer offsets across chunks.
3032
31-
3. ``frame_def`` (zero or more)::
33+
3. ``frame_table`` (zero or more)::
3234
33-
{"type":"frame_def","v":1,"run_id":"<hex>",
34-
"defs":[{"frame_id":<int>,"path_str_id":<int>,"func_str_id":<int>,
35-
"line":<int>,"end_line":<int>,"col":<int>,"end_col":<int>,
36-
"synthetic":true}, ...]}
35+
{"type":"frame_table","v":0,"run_id":"<hex>",
36+
"frames":[{"frame_id":<int>,"path_str_id":<int>,"func_str_id":<int>,
37+
"line":<int>,"end_line":<int>,"col":<int>,
38+
"end_col":<int>}, ...]}
3739
3840
``end_line``/``col``/``end_col`` are *omitted* when source location data
3941
is unavailable (a missing key means "not available", not zero or null).
40-
``synthetic`` is present only on synthetic frames (for example, internal
41-
marker frames whose source location is None) and absent otherwise.
42+
``line`` is ``0`` for synthetic frames (for example, internal marker
43+
frames whose source location is None). Frame IDs are zero-based.
4244
4345
4. ``agg`` (zero or more)::
4446
45-
{"type":"agg","v":1,"run_id":"<hex>","kind":"frame","scope":"final",
47+
{"type":"agg","v":0,"run_id":"<hex>","kind":"frame","scope":"final",
4648
"samples_total":<int>,
4749
"entries":[{"frame_id":<int>,"self":<int>,"cumulative":<int>}, ...]}
4850
@@ -54,7 +56,7 @@
5456
5557
5. ``end`` (exactly one, last line)::
5658
57-
{"type":"end","v":1,"run_id":"<hex>","samples_total":<int>}
59+
{"type":"end","v":0,"run_id":"<hex>","samples_total":<int>}
5860
5961
Presence of ``end`` is the consumer's signal that the file is complete.
6062
@@ -71,26 +73,13 @@
7173
import uuid
7274
from itertools import batched
7375

74-
from .constants import (
75-
PROFILING_MODE_ALL,
76-
PROFILING_MODE_CPU,
77-
PROFILING_MODE_EXCEPTION,
78-
PROFILING_MODE_GIL,
79-
PROFILING_MODE_WALL,
80-
)
76+
from .constants import PROFILING_MODE_NAMES
8177
from .collector import normalize_location
8278
from .stack_collector import StackTraceCollector
8379

8480

8581
_CHUNK_SIZE = 256
86-
87-
_MODE_NAMES = {
88-
PROFILING_MODE_WALL: "wall",
89-
PROFILING_MODE_CPU: "cpu",
90-
PROFILING_MODE_GIL: "gil",
91-
PROFILING_MODE_ALL: "all",
92-
PROFILING_MODE_EXCEPTION: "exception",
93-
}
82+
_SCHEMA_VERSION = 0
9483

9584

9685
class JsonlCollector(StackTraceCollector):
@@ -143,21 +132,29 @@ def export(self, filename):
143132
self._write_message(output, self._build_meta_record())
144133
self._write_chunked_records(
145134
output,
146-
{"type": "str_def", "v": 1, "run_id": self.run_id},
147-
"defs",
135+
{
136+
"type": "string_table",
137+
"v": _SCHEMA_VERSION,
138+
"run_id": self.run_id,
139+
},
140+
"strings",
148141
self._strings,
149142
)
150143
self._write_chunked_records(
151144
output,
152-
{"type": "frame_def", "v": 1, "run_id": self.run_id},
153-
"defs",
145+
{
146+
"type": "frame_table",
147+
"v": _SCHEMA_VERSION,
148+
"run_id": self.run_id,
149+
},
150+
"frames",
154151
self._frames,
155152
)
156153
self._write_chunked_records(
157154
output,
158155
{
159156
"type": "agg",
160-
"v": 1,
157+
"v": _SCHEMA_VERSION,
161158
"run_id": self.run_id,
162159
"kind": "frame",
163160
"scope": "final",
@@ -171,20 +168,22 @@ def export(self, filename):
171168
def _build_meta_record(self):
172169
record = {
173170
"type": "meta",
174-
"v": 1,
171+
"v": _SCHEMA_VERSION,
175172
"run_id": self.run_id,
176173
"sample_interval_usec": self.sample_interval_usec,
177174
}
178175

179176
if self._mode is not None:
180-
record["mode"] = _MODE_NAMES.get(self._mode, str(self._mode))
177+
record["mode"] = PROFILING_MODE_NAMES.get(
178+
self._mode, str(self._mode)
179+
)
181180

182181
return record
183182

184183
def _build_end_record(self):
185184
record = {
186185
"type": "end",
187-
"v": 1,
186+
"v": _SCHEMA_VERSION,
188187
"run_id": self.run_id,
189188
"samples_total": self._samples_total,
190189
}
@@ -201,7 +200,6 @@ def _iter_final_agg_entries(self):
201200
}
202201

203202
def _get_or_create_frame_id(self, filename, location, funcname):
204-
synthetic = location is None
205203
location_fields = self._location_to_export_fields(location)
206204
func_str_id = self._intern_string(funcname)
207205
path_str_id = self._intern_string(filename)
@@ -213,21 +211,18 @@ def _get_or_create_frame_id(self, filename, location, funcname):
213211
location_fields.get("end_line"),
214212
location_fields.get("col"),
215213
location_fields.get("end_col"),
216-
synthetic,
217214
)
218215

219216
if (frame_id := self._frame_to_id.get(frame_key)) is not None:
220217
return frame_id
221218

222-
frame_id = len(self._frames) + 1
219+
frame_id = len(self._frames)
223220
frame_record = {
224221
"frame_id": frame_id,
225222
"path_str_id": path_str_id,
226223
"func_str_id": func_str_id,
227224
**location_fields,
228225
}
229-
if synthetic:
230-
frame_record["synthetic"] = True
231226

232227
self._frame_to_id[frame_key] = frame_id
233228
self._frames.append(frame_record)
@@ -239,7 +234,7 @@ def _intern_string(self, value):
239234
if (string_id := self._string_to_id.get(value)) is not None:
240235
return string_id
241236

242-
string_id = len(self._strings) + 1
237+
string_id = len(self._strings)
243238
self._string_to_id[value] = string_id
244239
self._strings.append({"str_id": string_id, "value": value})
245240
return string_id

Lib/test/test_profiling/test_sampling_profiler/helpers.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -190,13 +190,13 @@ def jsonl_tables(records):
190190
str_defs = {
191191
item["str_id"]: item["value"]
192192
for record in records
193-
if record["type"] == "str_def"
194-
for item in record["defs"]
193+
if record["type"] == "string_table"
194+
for item in record["strings"]
195195
}
196196
frame_defs = [
197197
item
198198
for record in records
199-
if record["type"] == "frame_def"
200-
for item in record["defs"]
199+
if record["type"] == "frame_table"
200+
for item in record["frames"]
201201
]
202202
return meta, str_defs, frame_defs, agg, end

Lib/test/test_profiling/test_sampling_profiler/test_binary_format.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1260,7 +1260,7 @@ def test_binary_replay_to_jsonl_rle_weight_propagation(self):
12601260

12611261
self.assertEqual(end["samples_total"], 50)
12621262
self.assertEqual(agg["entries"], [
1263-
{"frame_id": 1, "self": 50, "cumulative": 50},
1263+
{"frame_id": 0, "self": 50, "cumulative": 50},
12641264
])
12651265

12661266
def test_binary_replay_to_jsonl_omits_unavailable_columns(self):

Lib/test/test_profiling/test_sampling_profiler/test_collectors.py

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1728,11 +1728,11 @@ def test_jsonl_collector_export_exact_output(self):
17281728
self.assertEqual(
17291729
content,
17301730
(
1731-
'{"type":"meta","v":1,"run_id":"run-123","sample_interval_usec":1000}\n'
1732-
'{"type":"str_def","v":1,"run_id":"run-123","defs":[{"str_id":1,"value":"func1"},{"str_id":2,"value":"file.py"},{"str_id":3,"value":"func2"},{"str_id":4,"value":"other_func"},{"str_id":5,"value":"other.py"}]}\n'
1733-
'{"type":"frame_def","v":1,"run_id":"run-123","defs":[{"frame_id":1,"path_str_id":2,"func_str_id":1,"line":10,"end_line":10},{"frame_id":2,"path_str_id":2,"func_str_id":3,"line":20,"end_line":20},{"frame_id":3,"path_str_id":5,"func_str_id":4,"line":5,"end_line":5}]}\n'
1734-
'{"type":"agg","v":1,"run_id":"run-123","kind":"frame","scope":"final","samples_total":3,"entries":[{"frame_id":1,"self":2,"cumulative":2},{"frame_id":2,"self":0,"cumulative":2},{"frame_id":3,"self":1,"cumulative":1}]}\n'
1735-
'{"type":"end","v":1,"run_id":"run-123","samples_total":3}\n'
1731+
'{"type":"meta","v":0,"run_id":"run-123","sample_interval_usec":1000}\n'
1732+
'{"type":"string_table","v":0,"run_id":"run-123","strings":[{"str_id":0,"value":"func1"},{"str_id":1,"value":"file.py"},{"str_id":2,"value":"func2"},{"str_id":3,"value":"other_func"},{"str_id":4,"value":"other.py"}]}\n'
1733+
'{"type":"frame_table","v":0,"run_id":"run-123","frames":[{"frame_id":0,"path_str_id":1,"func_str_id":0,"line":10,"end_line":10},{"frame_id":1,"path_str_id":1,"func_str_id":2,"line":20,"end_line":20},{"frame_id":2,"path_str_id":4,"func_str_id":3,"line":5,"end_line":5}]}\n'
1734+
'{"type":"agg","v":0,"run_id":"run-123","kind":"frame","scope":"final","samples_total":3,"entries":[{"frame_id":0,"self":2,"cumulative":2},{"frame_id":1,"self":0,"cumulative":2},{"frame_id":2,"self":1,"cumulative":1}]}\n'
1735+
'{"type":"end","v":0,"run_id":"run-123","samples_total":3}\n'
17361736
),
17371737
)
17381738

@@ -1919,18 +1919,19 @@ def test_jsonl_collector_splits_large_exports_into_chunks(self):
19191919
records
19201920
)
19211921
str_chunks = [
1922-
record for record in records if record["type"] == "str_def"
1922+
record for record in records if record["type"] == "string_table"
19231923
]
19241924
frame_chunks = [
1925-
record for record in records if record["type"] == "frame_def"
1925+
record for record in records if record["type"] == "frame_table"
19261926
]
19271927
agg_chunks = [record for record in records if record["type"] == "agg"]
19281928

19291929
self.assertEqual(
1930-
[len(record["defs"]) for record in str_chunks], [256, 256, 2]
1930+
[len(record["strings"]) for record in str_chunks],
1931+
[256, 256, 2],
19311932
)
19321933
self.assertEqual(
1933-
[len(record["defs"]) for record in frame_chunks], [256, 1]
1934+
[len(record["frames"]) for record in frame_chunks], [256, 1]
19341935
)
19351936
self.assertEqual(
19361937
[len(record["entries"]) for record in agg_chunks], [256, 1]
@@ -1960,11 +1961,11 @@ def test_jsonl_collector_respects_weight_for_rle_batched_samples(self):
19601961
self.assertEqual(agg["samples_total"], 5)
19611962
self.assertEqual(
19621963
{str_defs[fd["func_str_id"]]: fd["frame_id"] for fd in frame_defs},
1963-
{"leaf": 1, "non_leaf": 2},
1964+
{"leaf": 0, "non_leaf": 1},
19641965
)
19651966
self.assertEqual(agg["entries"], [
1966-
{"frame_id": 1, "self": 5, "cumulative": 5},
1967-
{"frame_id": 2, "self": 0, "cumulative": 5},
1967+
{"frame_id": 0, "self": 5, "cumulative": 5},
1968+
{"frame_id": 1, "self": 0, "cumulative": 5},
19681969
])
19691970

19701971
def test_jsonl_collector_recursion_with_weight(self):
@@ -1984,7 +1985,7 @@ def test_jsonl_collector_recursion_with_weight(self):
19841985
_, _, frame_defs, agg, _ = jsonl_tables(records)
19851986
self.assertEqual(len(frame_defs), 1)
19861987
self.assertEqual(agg["entries"], [
1987-
{"frame_id": 1, "self": 3, "cumulative": 3},
1988+
{"frame_id": 0, "self": 3, "cumulative": 3},
19881989
])
19891990

19901991
def test_jsonl_collector_emits_col_and_end_col_when_present(self):
@@ -2009,22 +2010,22 @@ def test_jsonl_collector_emits_col_and_end_col_when_present(self):
20092010
_, str_defs, frame_defs, _, _ = jsonl_tables(records)
20102011
self.assertEqual(frame_defs, [
20112012
{
2012-
"frame_id": 1,
2013-
"path_str_id": 2,
2014-
"func_str_id": 1,
2013+
"frame_id": 0,
2014+
"path_str_id": 1,
2015+
"func_str_id": 0,
20152016
"line": 42,
20162017
"end_line": 45,
20172018
"col": 4,
20182019
"end_col": 12,
20192020
},
20202021
])
2021-
self.assertEqual(str_defs, {1: "f", 2: "test.py"})
2022+
self.assertEqual(str_defs, {0: "f", 1: "test.py"})
20222023

20232024
def test_jsonl_collector_partial_location_elision(self):
20242025
"""Negative col/end_col/end_line fields are individually elided."""
20252026
# _get_or_create_frame_id interns funcname before filename, so
2026-
# func_str_id=1 ("f") and path_str_id=2 ("test.py").
2027-
common = {"frame_id": 1, "path_str_id": 2, "func_str_id": 1}
2027+
# func_str_id=0 ("f") and path_str_id=1 ("test.py").
2028+
common = {"frame_id": 0, "path_str_id": 1, "func_str_id": 0}
20282029
cases = [
20292030
(LocationInfo(42, 45, -1, 12),
20302031
{**common, "line": 42, "end_line": 45, "end_col": 12}),
@@ -2500,7 +2501,7 @@ def test_jsonl_collector_with_location_info(self):
25002501
self.assertEqual(
25012502
frame_defs[0],
25022503
{
2503-
"frame_id": 1,
2504+
"frame_id": 0,
25042505
"path_str_id": frame_defs[0]["path_str_id"],
25052506
"func_str_id": frame_defs[0]["func_str_id"],
25062507
"line": 42,
@@ -2541,11 +2542,10 @@ def test_jsonl_collector_with_none_location(self):
25412542
self.assertEqual(
25422543
frame_defs[0],
25432544
{
2544-
"frame_id": 1,
2545+
"frame_id": 0,
25452546
"path_str_id": frame_defs[0]["path_str_id"],
25462547
"func_str_id": frame_defs[0]["func_str_id"],
25472548
"line": 0,
2548-
"synthetic": True,
25492549
},
25502550
)
25512551

0 commit comments

Comments
 (0)