Skip to content

Commit 8d5c687

Browse files
committed
don't be too smart. bye array
1 parent a23814b commit 8d5c687

1 file changed

Lines changed: 41 additions & 62 deletions

File tree

Lib/profiling/sampling/gecko_collector.py

Lines changed: 41 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import array
21
import itertools
32
import io
43
import json
@@ -68,34 +67,7 @@
6867
DEFAULT_SPILL_BUFFER_BYTES = 128 * 1024
6968

7069

71-
class TypedSpillColumn:
72-
def __init__(self, directory, basename, typecode, *,
73-
buffer_bytes=DEFAULT_SPILL_BUFFER_BYTES):
74-
self.path = os.path.join(directory, basename)
75-
self.buffer = array.array(typecode)
76-
self.max_items = max(1, buffer_bytes // self.buffer.itemsize)
77-
78-
def append(self, value):
79-
self.buffer.append(value)
80-
if len(self.buffer) >= self.max_items:
81-
self.flush()
82-
83-
def flush(self):
84-
with open(self.path, "ab") as file:
85-
self.buffer.tofile(file)
86-
self.buffer.clear()
87-
88-
def iter_chunks(self):
89-
typecode = self.buffer.typecode
90-
block_bytes = self.max_items * self.buffer.itemsize
91-
with open(self.path, "rb") as file:
92-
for block in iter(lambda: file.read(block_bytes), b""):
93-
chunk = array.array(typecode)
94-
chunk.frombytes(block)
95-
yield chunk
96-
97-
98-
class NDJSONSpillColumn:
70+
class SpillColumn:
9971
_encoder = json.JSONEncoder(separators=(",", ":"))
10072

10173
def __init__(self, directory, basename, *,
@@ -104,8 +76,8 @@ def __init__(self, directory, basename, *,
10476
self.buffer = bytearray()
10577
self._buffer_bytes = buffer_bytes
10678

107-
def append_object(self, data):
108-
self.buffer += (self._encoder.encode(data) + "\n").encode()
79+
def append(self, value):
80+
self.buffer += (self._encoder.encode(value) + "\n").encode()
10981
if len(self.buffer) >= self._buffer_bytes:
11082
self.flush()
11183

@@ -114,30 +86,28 @@ def flush(self):
11486
file.write(self.buffer)
11587
self.buffer.clear()
11688

117-
def iter_lines(self):
89+
def iter_tokens(self):
11890
with open(self.path) as file:
11991
for line in file:
12092
yield line.rstrip("\n")
12193

12294

12395
class GeckoThreadSpill:
124-
_TYPED_COLUMNS = (
125-
("samples_stack", "samples-stack.bin", "q"),
126-
("samples_time", "samples-time.bin", "d"),
127-
("markers_name", "markers-name.bin", "q"),
128-
("markers_start_time", "markers-start-time.bin", "d"),
129-
("markers_end_time", "markers-end-time.bin", "d"),
130-
("markers_phase", "markers-phase.bin", "B"),
131-
("markers_category", "markers-category.bin", "I"),
96+
_COLUMNS = (
97+
("samples_stack", "samples-stack.json"),
98+
("samples_time", "samples-time.json"),
99+
("markers_name", "markers-name.json"),
100+
("markers_start_time", "markers-start-time.json"),
101+
("markers_end_time", "markers-end-time.json"),
102+
("markers_phase", "markers-phase.json"),
103+
("markers_category", "markers-category.json"),
104+
("markers_data", "markers-data.json"),
132105
)
133106

134107
def __init__(self, directory, tid):
135108
prefix = f"thread-{tid}-"
136-
for attr, basename, typecode in self._TYPED_COLUMNS:
137-
setattr(self, attr, TypedSpillColumn(
138-
directory, prefix + basename, typecode))
139-
self.markers_data = NDJSONSpillColumn(
140-
directory, prefix + "markers-data.ndjson")
109+
for attr, basename in self._COLUMNS:
110+
setattr(self, attr, SpillColumn(directory, prefix + basename))
141111
self.sample_count = 0
142112
self.marker_count = 0
143113

@@ -152,13 +122,12 @@ def append_marker(self, name_idx, start_time, end_time, phase, category, data):
152122
self.markers_end_time.append(end_time)
153123
self.markers_phase.append(phase)
154124
self.markers_category.append(category)
155-
self.markers_data.append_object(data)
125+
self.markers_data.append(data)
156126
self.marker_count += 1
157127

158128
def prepare_read(self):
159-
for attr, _basename, _typecode in self._TYPED_COLUMNS:
129+
for attr, _basename in self._COLUMNS:
160130
getattr(self, attr).flush()
161-
self.markers_data.flush()
162131

163132

164133
class GeckoCollector(Collector):
@@ -928,9 +897,13 @@ def _stream_thread(self, file, tid, thread_data):
928897

929898
def _stream_samples(self, file, spill):
930899
file.write('{"stack":')
931-
_stream_array(file, _tokens(spill.samples_stack), spill.sample_count)
900+
_stream_array(
901+
file, spill.samples_stack.iter_tokens(), spill.sample_count
902+
)
932903
file.write(',"time":')
933-
_stream_array(file, _tokens(spill.samples_time), spill.sample_count)
904+
_stream_array(
905+
file, spill.samples_time.iter_tokens(), spill.sample_count
906+
)
934907
file.write(',"eventDelay":')
935908
_stream_array(
936909
file,
@@ -943,17 +916,29 @@ def _stream_samples(self, file, spill):
943916

944917
def _stream_markers(self, file, spill):
945918
file.write('{"data":')
946-
_stream_array(file, spill.markers_data.iter_lines(), spill.marker_count)
919+
_stream_array(
920+
file, spill.markers_data.iter_tokens(), spill.marker_count
921+
)
947922
file.write(',"name":')
948-
_stream_array(file, _tokens(spill.markers_name), spill.marker_count)
923+
_stream_array(
924+
file, spill.markers_name.iter_tokens(), spill.marker_count
925+
)
949926
file.write(',"startTime":')
950-
_stream_array(file, _tokens(spill.markers_start_time), spill.marker_count)
927+
_stream_array(
928+
file, spill.markers_start_time.iter_tokens(), spill.marker_count
929+
)
951930
file.write(',"endTime":')
952-
_stream_array(file, _tokens(spill.markers_end_time), spill.marker_count)
931+
_stream_array(
932+
file, spill.markers_end_time.iter_tokens(), spill.marker_count
933+
)
953934
file.write(',"phase":')
954-
_stream_array(file, _tokens(spill.markers_phase), spill.marker_count)
935+
_stream_array(
936+
file, spill.markers_phase.iter_tokens(), spill.marker_count
937+
)
955938
file.write(',"category":')
956-
_stream_array(file, _tokens(spill.markers_category), spill.marker_count)
939+
_stream_array(
940+
file, spill.markers_category.iter_tokens(), spill.marker_count
941+
)
957942
file.write(',"length":')
958943
file.write(repr(spill.marker_count))
959944
file.write("}")
@@ -972,9 +957,3 @@ def _stream_array(file, token_iter, expected_count):
972957
f"streamed {count} array items, expected {expected_count}"
973958
)
974959
file.write("]")
975-
976-
977-
def _tokens(column):
978-
for chunk in column.iter_chunks():
979-
for value in chunk:
980-
yield repr(value)

0 commit comments

Comments
 (0)