Skip to content

Commit 4ccc4f9

Browse files
art049claude
andcommitted
feat: add MsgPack+LZ4 output format for tracegrind
Add --output-format=csv|msgpack option. MsgPack format uses LZ4 block compression achieving ~12x compression vs CSV. New files: - tg_msgpack.c/h: MsgPack encoder (write-only) - tg_lz4.c/h: LZ4 compression wrapper with VG_() adaptations - lz4.c/h: Vendored LZ4 library (BSD-2-Clause) - docs/tracegrind-msgpack-format.md: Format specification Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 4e65d6a commit 4ccc4f9

12 files changed

Lines changed: 4540 additions & 63 deletions

File tree

tracegrind/Makefile.am

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,11 @@ pkginclude_HEADERS = tracegrind.h
1111
noinst_HEADERS = \
1212
costs.h \
1313
events.h \
14-
global.h
14+
global.h \
15+
lz4.c \
16+
lz4.h \
17+
tg_lz4.h \
18+
tg_msgpack.h
1519

1620
#----------------------------------------------------------------------------
1721
# tracegrind-<platform>
@@ -36,7 +40,9 @@ TRACEGRIND_SOURCES_COMMON = \
3640
jumps.c \
3741
main.c \
3842
sim.c \
39-
threads.c
43+
threads.c \
44+
tg_lz4.c \
45+
tg_msgpack.c
4046

4147
# We sneakily include "cg_branchpred.c" and "cg_arch.c" from cachegrind
4248
TRACEGRIND_CFLAGS_COMMON = -I$(top_srcdir)/cachegrind

tracegrind/callstack.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -250,10 +250,10 @@ void TG_(push_call_stack)(BBCC* from, UInt jmp, BBCC* to, Addr sp, Bool skip)
250250

251251
TG_(current_call_stack).sp++;
252252

253-
/* Emit CSV trace sample on function entry */
253+
/* Emit trace sample on function entry */
254254
if (!skip && TG_(current_state).collect) {
255255
fn_node* to_fn = to->cxt->fn[0];
256-
TG_(trace_emit_sample)(TG_(current_tid), "ENTER", to_fn);
256+
TG_(trace_emit_sample)(TG_(current_tid), True, to_fn);
257257
}
258258

259259
/* To allow for above assertion we set context of next frame to 0 */
@@ -359,9 +359,9 @@ void TG_(pop_call_stack)(void)
359359
}
360360
TG_(stat).ret_counter++;
361361

362-
/* Emit CSV trace sample on function exit */
362+
/* Emit trace sample on function exit */
363363
if (TG_(current_state).collect) {
364-
TG_(trace_emit_sample)(TG_(current_tid), "EXIT", to_fn);
364+
TG_(trace_emit_sample)(TG_(current_tid), False, to_fn);
365365
}
366366

367367
/* restore context */

tracegrind/clo.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -518,6 +518,11 @@ Bool TG_(process_cmd_line_option)(const HChar* arg)
518518

519519
else if VG_STR_CLO(arg, "--tracegrind-out-file", TG_(clo).out_format) {}
520520

521+
else if VG_XACT_CLO(arg, "--output-format=csv",
522+
TG_(clo).output_format, output_format_csv) {}
523+
else if VG_XACT_CLO(arg, "--output-format=msgpack",
524+
TG_(clo).output_format, output_format_msgpack) {}
525+
521526
else if VG_BOOL_CLO(arg, "--mangle-names", TG_(clo).mangle_names) {}
522527

523528
else if VG_BOOL_CLO(arg, "--skip-direct-rec",
@@ -573,6 +578,7 @@ void TG_(print_usage)(void)
573578
VG_(printf)(
574579
"\n dump creation options:\n"
575580
" --tracegrind-out-file=<f> Output file name [tracegrind.out.%%p]\n"
581+
" --output-format=csv|msgpack Output format [csv]\n"
576582
" --dump-line=no|yes Dump source lines of costs? [yes]\n"
577583
" --dump-instr=no|yes Dump instruction address of costs? [no]\n"
578584
" --compress-strings=no|yes Compress strings in profile dump? [yes]\n"
@@ -697,4 +703,6 @@ void TG_(set_clo_defaults)(void)
697703
TG_(clo).verbose = 0;
698704
TG_(clo).verbose_start = 0;
699705
#endif
706+
707+
TG_(clo).output_format = output_format_csv;
700708
}
Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
# Tracegrind MsgPack+LZ4 Output Format
2+
3+
## Overview
4+
5+
Tracegrind's `--output-format=msgpack` produces a binary trace file combining MsgPack serialization with LZ4 block compression. Files use the `.msgpack.lz4` extension.
6+
7+
## File Structure
8+
9+
```
10+
┌─────────────────────────────────┐
11+
│ File Header (8 bytes) │
12+
├─────────────────────────────────┤
13+
│ Schema Chunk │
14+
├─────────────────────────────────┤
15+
│ Data Chunk 1..N │
16+
├─────────────────────────────────┤
17+
│ End Marker (8 bytes) │
18+
└─────────────────────────────────┘
19+
```
20+
21+
## File Header
22+
23+
| Offset | Size | Field | Description |
24+
|--------|------|---------|-------------|
25+
| 0 | 4 | magic | ASCII `TGMP` (0x54 0x47 0x4D 0x50) |
26+
| 4 | 4 | version | Format version, uint32 LE (currently 1) |
27+
28+
## Chunk Format
29+
30+
Each chunk (schema and data) has the same header:
31+
32+
| Offset | Size | Field | Description |
33+
|--------|------|-------------------|-------------|
34+
| 0 | 4 | uncompressed_size | Size after decompression, uint32 LE |
35+
| 4 | 4 | compressed_size | Size of LZ4 block, uint32 LE |
36+
| 8 | N | data | LZ4 block-compressed MsgPack data |
37+
38+
## Schema Chunk
39+
40+
The first chunk contains a MsgPack map:
41+
42+
```json
43+
{
44+
"version": 1,
45+
"format": "tracegrind-msgpack",
46+
"columns": ["seq", "tid", "event", "fn", "obj", "file", "line", "Ir", ...]
47+
}
48+
```
49+
50+
### Fixed Columns
51+
52+
| Index | Name | Type | Description |
53+
|-------|-------|--------|-------------|
54+
| 0 | seq | uint64 | Sequence number |
55+
| 1 | tid | int32 | Thread ID |
56+
| 2 | event | int | 0 = ENTER, 1 = EXIT |
57+
| 3 | fn | string | Function name |
58+
| 4 | obj | string | Shared object path |
59+
| 5 | file | string | Source file path |
60+
| 6 | line | int32 | Line number (0 if unknown) |
61+
62+
### Event Columns (index 7+)
63+
64+
Event counters as delta values: `Ir`, `Dr`, `Dw`, `I1mr`, `D1mr`, `D1mw`, `ILmr`, `DLmr`, `DLmw`, `Bc`, `Bcm`, `Bi`, `Bim`. Which columns are present depends on Tracegrind options.
65+
66+
## Data Chunks
67+
68+
Each data chunk contains concatenated MsgPack arrays (one per row):
69+
70+
```
71+
[seq, tid, event, fn, obj, file, line, delta_Ir, ...]
72+
```
73+
74+
The reference implementation writes 4096 rows per chunk.
75+
76+
## End Marker
77+
78+
8 zero bytes (uncompressed_size = 0, compressed_size = 0).
79+
80+
## Example: Reading in Python
81+
82+
```python
83+
import struct, lz4.block, msgpack
84+
85+
def read_tracegrind(filepath):
86+
with open(filepath, 'rb') as f:
87+
assert f.read(4) == b'TGMP'
88+
version = struct.unpack('<I', f.read(4))[0]
89+
90+
# Read schema chunk
91+
usize, csize = struct.unpack('<II', f.read(8))
92+
schema = msgpack.unpackb(
93+
lz4.block.decompress(f.read(csize), uncompressed_size=usize))
94+
columns = [c.decode() if isinstance(c, bytes) else c
95+
for c in schema[b'columns']]
96+
97+
# Read data chunks
98+
rows = []
99+
while True:
100+
usize, csize = struct.unpack('<II', f.read(8))
101+
if usize == 0 and csize == 0:
102+
break
103+
chunk = lz4.block.decompress(f.read(csize), uncompressed_size=usize)
104+
unpacker = msgpack.Unpacker(raw=False)
105+
unpacker.feed(chunk)
106+
for row in unpacker:
107+
rows.append(dict(zip(columns, row)))
108+
109+
return columns, rows
110+
```
111+
112+
## References
113+
114+
- [MsgPack Specification](https://github.com/msgpack/msgpack/blob/master/spec.md)
115+
- [LZ4 Block Format](https://github.com/lz4/lz4/blob/dev/doc/lz4_Block_format.md)
116+
117+
## Reference Implementation
118+
119+
- `tracegrind/tg_msgpack.c/h` - MsgPack encoder
120+
- `tracegrind/tg_lz4.c/h` - LZ4 compression wrapper
121+
- `tracegrind/lz4.c/h` - Vendored LZ4 library
122+
- `tracegrind/dump.c` - Trace output integration

0 commit comments

Comments
 (0)