Skip to content

Commit 03458aa

Browse files
authored
profiler: Fill in the profiler state functions (#1226)
* profiler: Fill in the profiler state functions This patch is a follow-up to #1213 It fills in the profiler state implementation, so that it can be usable from the whamm monitor. The most notable change that is not described in the original issue, is the profile result encoding format, described in the `format.rs` module. * Review comments
1 parent e54e18b commit 03458aa

5 files changed

Lines changed: 461 additions & 48 deletions

File tree

crates/profiler-lib/src/format.rs

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
//! Binary encoding of the profile report.
2+
//!
3+
//! A report is a magic + version header followed by fixed-width,
4+
//! little-endian records, in the following format:
5+
//!
6+
//! ```text
7+
//! magic : b"JPRF" (4 bytes)
8+
//! version : u8 (== VERSION)
9+
//! records : u32 (number of records that follow)
10+
//! record[] : (16 bytes each, ordered by (func_addr, target))
11+
//! func_addr : u32
12+
//! target : u32
13+
//! count : u64
14+
//! ```
15+
16+
use anyhow::{Result, ensure};
17+
18+
const MAGIC: &[u8; 4] = b"JPRF";
19+
const VERSION: u8 = 1;
20+
const HEADER_LEN: usize = MAGIC.len() + size_of::<u8>() + size_of::<u32>();
21+
const RECORD_LEN: usize = size_of::<u32>() + size_of::<u32>() + size_of::<u64>();
22+
23+
/// A single `(func_addr, target) -> count` report entry.
24+
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
25+
pub struct Record {
26+
/// QuickJS bytecode buffer start address, identifying the JS function.
27+
pub func_addr: u32,
28+
/// The `br_table` target, i.e. the QuickJS opcode.
29+
pub target: u32,
30+
/// Total countable Wasm instructions attributed to that opcode.
31+
pub count: u64,
32+
}
33+
34+
/// Serialize `records` into the binary report format.
35+
pub fn write<I>(records: I) -> Vec<u8>
36+
where
37+
I: ExactSizeIterator<Item = Record>,
38+
{
39+
let mut out = Vec::with_capacity(HEADER_LEN + records.len() * RECORD_LEN);
40+
out.extend_from_slice(MAGIC);
41+
out.push(VERSION);
42+
out.extend_from_slice(&(records.len() as u32).to_le_bytes());
43+
for r in records {
44+
out.extend_from_slice(&r.func_addr.to_le_bytes());
45+
out.extend_from_slice(&r.target.to_le_bytes());
46+
out.extend_from_slice(&r.count.to_le_bytes());
47+
}
48+
out
49+
}
50+
51+
/// Parse a binary report, validating the magic and version.
52+
pub fn read(bytes: &[u8]) -> Result<Vec<Record>> {
53+
ensure!(bytes.len() >= HEADER_LEN, "report shorter than header");
54+
ensure!(&bytes[..MAGIC.len()] == MAGIC, "bad report magic");
55+
56+
let version = bytes[MAGIC.len()];
57+
ensure!(version == VERSION, "unsupported report version {version}");
58+
59+
let count_off = MAGIC.len() + size_of::<u8>();
60+
let count =
61+
u32::from_le_bytes(bytes[count_off..count_off + size_of::<u32>()].try_into()?) as usize;
62+
63+
let body = &bytes[HEADER_LEN..];
64+
ensure!(
65+
body.len() == count * RECORD_LEN,
66+
"report body is {} bytes, expected {} for {count} records",
67+
body.len(),
68+
count * RECORD_LEN
69+
);
70+
71+
let records = body
72+
.chunks_exact(RECORD_LEN)
73+
.map(|c| Record {
74+
func_addr: u32::from_le_bytes(c[0..4].try_into().unwrap()),
75+
target: u32::from_le_bytes(c[4..8].try_into().unwrap()),
76+
count: u64::from_le_bytes(c[8..16].try_into().unwrap()),
77+
})
78+
.collect();
79+
80+
Ok(records)
81+
}
82+
83+
#[cfg(test)]
84+
mod tests {
85+
use super::*;
86+
87+
#[test]
88+
fn roundtrips() {
89+
let records = vec![
90+
Record {
91+
func_addr: 0x1000,
92+
target: 5,
93+
count: 3,
94+
},
95+
Record {
96+
func_addr: 0x1000,
97+
target: 7,
98+
count: 5,
99+
},
100+
];
101+
let bytes = write(records.clone().into_iter());
102+
assert_eq!(read(&bytes).unwrap(), records);
103+
}
104+
105+
#[test]
106+
fn empty_roundtrips() {
107+
let bytes = write(std::iter::empty());
108+
assert!(read(&bytes).unwrap().is_empty());
109+
}
110+
111+
#[test]
112+
fn rejects_bad_magic() {
113+
let mut bytes = write(std::iter::empty());
114+
bytes[0] = b'X';
115+
assert!(read(&bytes).is_err());
116+
}
117+
118+
#[test]
119+
fn rejects_truncated_body() {
120+
let bytes = write(std::iter::once(Record {
121+
func_addr: 1,
122+
target: 2,
123+
count: 3,
124+
}));
125+
assert!(read(&bytes[..bytes.len() - 1]).is_err());
126+
}
127+
}

crates/profiler-lib/src/interpreter.rs

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,32 @@ pub fn is_byte_load(load: &Load) -> bool {
186186
matches!(load.kind, LoadKind::I32_8 { .. } | LoadKind::I64_8 { .. })
187187
}
188188

189+
/// Collect the byte offsets of "countable" opcodes.
190+
pub(crate) fn countable_opcodes(func: &LocalFunction) -> BTreeSet<u32> {
191+
#[derive(Default)]
192+
struct Collect {
193+
countable: BTreeSet<u32>,
194+
}
195+
impl<'instr> Visitor<'instr> for Collect {
196+
fn visit_instr(&mut self, instr: &'instr Instr, loc: &'instr InstrLocId) {
197+
let excluded = matches!(
198+
instr,
199+
Instr::Block(_)
200+
| Instr::Loop(_)
201+
| Instr::Drop(_)
202+
| Instr::Return(_)
203+
| Instr::Unreachable(_)
204+
);
205+
if !excluded {
206+
self.countable.insert(loc.data());
207+
}
208+
}
209+
}
210+
let mut v = Collect::default();
211+
dfs_in_order(&mut v, func, func.entry_block());
212+
v.countable
213+
}
214+
189215
impl<'f, 'instr> Visitor<'instr> for AbstractInterp<'f> {
190216
fn visit_instr(&mut self, _: &'instr Instr, loc: &'instr InstrLocId) {
191217
// Save the program counter before visiting each operator.

crates/profiler-lib/src/lib.rs

Lines changed: 48 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,30 @@
1+
pub mod format;
12
mod interpreter;
23
mod state;
34

4-
use state::State;
5+
use state::{Profiler, State};
6+
use std::cell::RefCell;
57
use std::io::Read;
68
use std::sync::OnceLock;
79

810
static STATE: OnceLock<State> = OnceLock::new();
911

12+
thread_local! {
13+
/// Runtime profiling state.
14+
static PROFILER: RefCell<Profiler> = RefCell::new(Profiler::new());
15+
}
16+
1017
fn state() -> &'static State {
1118
STATE
1219
.get()
1320
.expect("STATE must be initialized via `wizer.initialize`")
1421
}
1522

23+
/// Run `f` with mutable access to the runtime [`Profiler`].
24+
fn with_profiler<R>(f: impl FnOnce(&mut Profiler) -> R) -> R {
25+
PROFILER.with_borrow_mut(f)
26+
}
27+
1628
/// Use Wizer to pre-initialize the user library module that will be
1729
/// passed to Whamm. Note that this is temporary workaround until
1830
/// Whamm grows such capability (see
@@ -63,32 +75,55 @@ pub extern "C" fn is_dispatch_load(fid: u32, pc: u32) -> bool {
6375
/// Start a new JS function frame.
6476
#[unsafe(no_mangle)]
6577
pub extern "C" fn start_func() {
66-
todo!()
78+
with_profiler(|p| p.start_func());
6779
}
6880

69-
/// Exit the top most JS function frame.
81+
/// Pop the top most JS function frame. The outermost activation closes
82+
/// out the final opcode with the instruction count at this point.
7083
#[unsafe(no_mangle)]
71-
pub extern "C" fn exit_func() {
72-
todo!()
84+
pub extern "C" fn exit_func(instruction_count: i64) {
85+
with_profiler(|p| p.exit_func(instruction_count as u64));
7386
}
7487

75-
/// Set current dispatch function target. i.e., the `br_table` target.
88+
/// Switch to the dispatch `br_table` target, closing out the previous
89+
/// opcode with the instruction count at this point.
7690
#[unsafe(no_mangle)]
77-
pub extern "C" fn set_dispatch_target(_target: u32) {
78-
todo!()
91+
pub extern "C" fn set_dispatch_target(target: u32, instruction_count: i64) {
92+
with_profiler(|p| p.set_dispatch_target(target, instruction_count as u64));
7993
}
8094

8195
/// Set the effective address (i.e., the start address) of the current
8296
/// function which uniquely identifies it.
8397
#[unsafe(no_mangle)]
84-
pub extern "C" fn set_func_addr(_addr: u32) {
85-
todo!()
98+
pub extern "C" fn set_func_addr(addr: u32) {
99+
with_profiler(|p| p.set_func_addr(addr));
100+
}
101+
102+
/// Whether the opcode at offset `pc` in function `fid` is counted by
103+
/// the profiler.
104+
#[unsafe(no_mangle)]
105+
pub extern "C" fn is_countable_opcode(fid: u32, pc: u32) -> bool {
106+
state().is_countable_opcode(fid, pc)
107+
}
108+
109+
/// Flush the profiler results into a buffer.
110+
#[unsafe(no_mangle)]
111+
pub extern "C" fn report() {
112+
with_profiler(|p| p.report());
113+
}
114+
115+
// TODO: Validate that `report` must run exactly once per profile
116+
// invocation.
117+
/// Linear-memory offset of the serialized report buffer.
118+
#[unsafe(no_mangle)]
119+
pub extern "C" fn report_ptr() -> u32 {
120+
with_profiler(|p| p.report_bytes().as_ptr() as u32)
86121
}
87122

88-
/// Handle the execution of the given opcode.
123+
/// Length in bytes of the serialized report buffer.
89124
#[unsafe(no_mangle)]
90-
pub extern "C" fn handle_opcode(_pc: u32) {
91-
todo!()
125+
pub extern "C" fn report_len() -> u32 {
126+
with_profiler(|p| p.report_bytes().len() as u32)
92127
}
93128

94129
#[cfg(test)]

crates/profiler-lib/src/monitor.mm

Lines changed: 23 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,57 +1,46 @@
11
use {{LIBRARY_NAME}};
22

3-
// Ensure that we only read the first address of the function
4-
// bytecode, which serves as the function identifier.
5-
var expect_first_load: bool;
3+
// Global count of executed countable Wasm instructions, across every
4+
// function.
5+
var instruction_count: i64;
66

7-
// True only while executing a handler body, that is the window
8-
// between a dispatch `br_table` and the next dispatch load. Provides
9-
// more accurate counting of Wasm-instructions-per-JS-opcode.
10-
var counting: bool;
7+
// Set to true on the dispatch function entry to capture the function's
8+
// address from its first dispatch load.
9+
var expect_first_load: bool;
1110

1211
wasm:func:entry / @static {{LIBRARY_NAME}}.is_dispatch_func(fid as i32) as bool / {
13-
// When entering the dispatch function, track a new JS function frame.
12+
// Push a new JS function frame.
1413
{{LIBRARY_NAME}}.start_func();
15-
// When entering the dispatch function, we expect the first load later on.
1614
expect_first_load = true;
1715
}
1816

1917
wasm:func:exit / @static {{LIBRARY_NAME}}.is_dispatch_func(fid as i32) as bool / {
20-
// When exiting the dispatch function, pop the current JS function frame.
21-
{{LIBRARY_NAME}}.exit_func();
18+
// Pop the topmost JS function frame. The outermost activation closes
19+
// out the final opcode with the current instruction count.
20+
{{LIBRARY_NAME}}.exit_func(instruction_count);
2221
}
2322

23+
// Switching the dispatch target closes out the opcode that just ran
24+
// and begins the new one.
25+
// Declared before the increment probe so this `br_table` counts toward
26+
// current JS opcode it dispatches to, not the previous one.
2427
wasm:opcode:br_table:before / @static {{LIBRARY_NAME}}.is_dispatch_func(fid as i32) as bool / {
25-
// We are in the dispatch loop, so we track the current dispatch target.
26-
{{LIBRARY_NAME}}.set_dispatch_target(target as i32);
27-
// Entering a handler body for `target`, so we begin counting.
28-
counting = true;
28+
{{LIBRARY_NAME}}.set_dispatch_target(target as i32, instruction_count);
2929
}
3030

31-
wasm:opcode:*load*:before / @static {{LIBRARY_NAME}}.is_dispatch_load(fid as i32, pc as i32) as bool / {
32-
// When hitting the dispatch load, stop counting, the current and
33-
// following instructions do not belong to any JS opcode handler.
34-
counting = false;
31+
// Increment the instruction count, iff the opcode is countable.
32+
wasm:opcode:*:before / @static {{LIBRARY_NAME}}.is_countable_opcode(fid as i32, pc as i32) as bool / {
33+
instruction_count = instruction_count + 1;
3534
}
3635

3736
wasm:opcode:*load*:before / expect_first_load && @static {{LIBRARY_NAME}}.is_dispatch_load(fid as i32, pc as i32) as bool / {
38-
// Set the current function identifier, which is the effective address of the dispatch load.
39-
// We no longer care about `expect_first_load`, this probe should
40-
// only fire once per JS function invocation.
37+
// First load of the frame: its effective address identifies the JS
38+
// function.
4139
{{LIBRARY_NAME}}.set_func_addr(effective_addr as i32);
4240
expect_first_load = false;
4341
}
4442

45-
wasm:opcode:*:before / @static {{LIBRARY_NAME}}.is_dispatch_func(fid as i32) as bool
46-
&& counting
47-
&& opname != "nop"
48-
&& opname != "drop"
49-
&& opname != "block"
50-
&& opname != "loop"
51-
&& opname != "unreachable"
52-
&& opname != "return"
53-
&& opname != "else"
54-
&& opname != "end" / {
55-
56-
{{LIBRARY_NAME}}.handle_opcode(pc as i32);
43+
wasm:func:exit / fname == "_start" / {
44+
// At program exit emit the single, whole-execution report.
45+
{{LIBRARY_NAME}}.report();
5746
}

0 commit comments

Comments
 (0)