Skip to content

Commit 3f223b7

Browse files
committed
Store not just the start but also the end of the load instructions in the custom section.
Load instructions are usually 3 bytes in x64 but extend to 4 if loading from R12 or RSP. We pack the distinction into an array of bits to save perhaps several K of rather hot cache.
1 parent d3a0053 commit 3f223b7

9 files changed

Lines changed: 158 additions & 56 deletions

File tree

cranelift/codegen/meta/src/shared/instructions.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -393,8 +393,9 @@ fn define_control_flow(
393393
segfault, which hands off control to a signal handler for further
394394
action. The handler has access to ``context`` (typically the
395395
``VMContext``'s ``vm_store_context``) and can use the second
396-
reserved register to store a temp value, as needed on platforms
397-
where signal handlers cannot push stack frames.
396+
reserved register to store a temp value--like the original return
397+
value--as needed on platforms where signal handlers cannot push stack
398+
frames.
398399
399400
On x64, RDI holds ``context``, and R10 is used as scratch space.
400401
"#,

cranelift/codegen/src/isa/x64/inst/emit.rs

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -648,17 +648,15 @@ pub(crate) fn emit(
648648
Inst::DeadLoadWithContext { dst, load_ptr, .. } => {
649649
let start = sink.cur_offset();
650650

651-
// Since we're clobbering r10 anyway to store the original return
652-
// address, we also use it as a destination for the dead load rather
653-
// than sucking up another reg:
654651
let load_ptr_addr = SyntheticAmode::real(Amode::imm_reg(0, **load_ptr));
652+
// Since we're clobbering dst anyway to store the original return
653+
// address, also use it as a destination for the dead load rather
654+
// than sucking up another reg:
655655
asm::inst::movq_rm::new(*dst, load_ptr_addr).emit(sink, info, state);
656656

657-
let end = sink.cur_offset();
658-
659657
// Put the address of this instruction aside so we can later
660658
// distinguish whether a segfault is its fault.
661-
sink.add_epoch_check();
659+
sink.add_epoch_check(start, sink.cur_offset());
662660
}
663661

664662
Inst::JmpKnown { dst } => uncond_jmp(sink, *dst),

cranelift/codegen/src/machinst/buffer.rs

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,7 @@ pub struct MachBuffer<I: VCodeInst> {
253253
/// Any patchable call site locations.
254254
patchable_call_sites: SmallVec<[MachPatchableCallSite; 16]>,
255255
/// Any locations which do an MMU-based check for the end of an epoch.
256-
epoch_checks: SmallVec<[EpochCheckOffset; 16]>,
256+
epoch_checks: SmallVec<[Range<CodeOffset>; 16]>,
257257
/// Any exception-handler records referred to at call sites.
258258
exception_handlers: SmallVec<[MachExceptionHandler; 16]>,
259259
/// Any source location mappings referring to this code.
@@ -384,7 +384,7 @@ pub struct MachBufferFinalized<T: CompilePhase> {
384384
/// Any patchable call site locations refering to this code.
385385
pub(crate) patchable_call_sites: SmallVec<[MachPatchableCallSite; 16]>,
386386
/// Any locations which do an MMU-based check for the end of an epoch.
387-
pub epoch_checks: SmallVec<[EpochCheckOffset; 16]>,
387+
pub epoch_checks: SmallVec<[Range<CodeOffset>; 16]>,
388388
/// Any exception-handler records referred to at call sites.
389389
pub(crate) exception_handlers: SmallVec<[FinalizedMachExceptionHandler; 16]>,
390390
/// Any source location mappings referring to this code.
@@ -1704,11 +1704,13 @@ impl<I: VCodeInst> MachBuffer<I> {
17041704
}
17051705

17061706
/// Record that an MMU-based epoch interruption check occurs at the current
1707-
/// offset. The signal handler uses these annotations to distinguish that a
1707+
/// offset. A signal handler may use these annotations to distinguish that a
17081708
/// segfault is actually an epoch interruption in disguise. The
17091709
/// DeadLoadWithContext instruction is assumed to have already been emitted.
1710-
pub fn add_epoch_check(&mut self) {
1711-
self.epoch_checks.push(self.cur_offset());
1710+
/// `start` is the offset of the emitted instruction, and `end` is the
1711+
/// offset of the immediately following instruction.
1712+
pub fn add_epoch_check(&mut self, start: CodeOffset, end: CodeOffset) {
1713+
self.epoch_checks.push(start..end);
17121714
}
17131715

17141716
/// Add an unwind record at the current offset.
@@ -2213,12 +2215,6 @@ pub struct MachPatchableCallSite {
22132215
pub len: u32,
22142216
}
22152217

2216-
/// The location of an epoch-end check, when using MMU-based epoch interruption.
2217-
///
2218-
/// Specifically, this points to the instruction after the one that does the
2219-
/// epoch-end check: the one at which to resume execution.
2220-
pub type EpochCheckOffset = CodeOffset;
2221-
22222218
/// A source-location mapping resulting from a compilation.
22232219
#[derive(PartialEq, Debug, Clone)]
22242220
#[cfg_attr(

crates/environ/src/compile/epoch_checks.rs

Lines changed: 95 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,13 @@
44
55
use crate::obj::ELF_WASMTIME_EPOCH_CHECKS;
66
use crate::prelude::*;
7+
use object::SectionKind;
78
use object::write::{Object, StandardSegment};
8-
use object::{LittleEndian, SectionKind, U32Bytes};
99
use std::ops::Range;
1010

11-
/// Offset of an epoch check within its function, in bytes. Specifically, this
12-
/// points to The instruction after the one that does the epoch-end check: the
13-
/// one at which to resume execution.
11+
/// Offset of an epoch check (in bytes) within the text section. Specifically,
12+
/// this points to the load instruction that may trigger the epoch-ending
13+
/// segfault.
1414
///
1515
/// This is parallel to cranelift's CodeOffset and exists to (1) avoid making it
1616
/// a dependency, (2) pin it down to <= 32 bits, since the format of the
@@ -21,8 +21,12 @@ type EpochCheckOffset = u32;
2121
/// epoch-end-check locations in a native binary.
2222
#[derive(Default)]
2323
pub struct EpochCheckSection {
24-
/// Offset of the instruction to resume at after the epoch end and task switch
25-
return_offsets: Vec<U32Bytes<LittleEndian>>,
24+
/// Offset of the start of the load instruction which effects the epoch
25+
/// check
26+
starts: Vec<u32>,
27+
/// Packed bits, parallel to elements of `starts`, which tell the length of
28+
/// the load instruction: 0 if 3 bytes, 1 if 4
29+
ends: Vec<u8>,
2630
/// The largest (and most recent, because we accept them only in order)
2731
/// offset received so far for enforcing ordering. This is relative to the
2832
/// start of the code (text) section so we can make sure functions are
@@ -34,20 +38,29 @@ impl EpochCheckSection {
3438
/// Adds an epoch-check location to the section.
3539
///
3640
/// Calls to this must be ordered by the location of `func`, and
37-
/// `check_offsets` must be ordered (within each function) as well.
38-
pub fn push(&mut self, func: Range<u64>, check_offsets: &[EpochCheckOffset]) {
41+
/// offsets must be ordered (within each function) as well.
42+
pub fn push(&mut self, func: Range<u64>, checks: &[Range<EpochCheckOffset>]) {
3943
// Check that functions have been pushed in order so our section is
4044
// sorted for free.
4145
let func_start = u32::try_from(func.start).unwrap();
4246
let func_end = u32::try_from(func.end).unwrap();
4347
assert!(func_start >= self.last_offset);
4448

4549
// Remember each offset, ensuring they are in order.
46-
for offset in check_offsets {
47-
let text_section_relative = func_start + offset;
50+
for check in checks {
51+
let text_section_relative = func_start + check.start;
4852
assert!(text_section_relative > self.last_offset);
49-
self.return_offsets
50-
.push(U32Bytes::new(LittleEndian, text_section_relative));
53+
let bit_number = self.starts.len();
54+
self.starts.push(text_section_relative);
55+
let load_is_long = match check.len() {
56+
3 => false,
57+
4 => true,
58+
_ => panic!(
59+
"Unexpected length of epoch-checking load instruction: {}",
60+
check.len()
61+
),
62+
};
63+
set_bit(&mut self.ends, bit_number, load_is_long);
5164
self.last_offset = text_section_relative;
5265
}
5366
self.last_offset = func_end;
@@ -61,7 +74,75 @@ impl EpochCheckSection {
6174
SectionKind::ReadOnlyData,
6275
);
6376

64-
// Append offsets.
65-
obj.append_section_data(section, object::bytes_of_slice(&self.return_offsets), 4);
77+
let num_checks: u32 = self.starts.len().try_into()
78+
.expect("there should be few enough epoch checks to be indexed by a u32, as the Wasm itself is only that long");
79+
// Number of epoch checks:
80+
obj.append_section_data(
81+
section,
82+
object::bytes_of(&num_checks),
83+
4, // For speed, avoid splitting across a cache line.
84+
);
85+
// Align to 4 so we can use from_raw_parts() when reading:
86+
obj.append_section_data(section, object::bytes_of_slice(&self.starts), 4);
87+
// We'll be querying this a byte at a time so don't care about alignment:
88+
obj.append_section_data(section, object::bytes_of_slice(&self.ends), 1);
89+
}
90+
}
91+
92+
/// Returns the offset at which to resume after the given epoch check.
93+
///
94+
/// If there is no epoch check at that offset, return None.
95+
pub fn return_offset_for_epoch_check(
96+
section: &[u8],
97+
check: EpochCheckOffset,
98+
) -> Option<EpochCheckOffset> {
99+
let (num_checks, rest) = object::from_bytes::<u32>(section)
100+
.expect(".wasmtime.epochchecks section should be long enough to contain count");
101+
let (starts, ends) = object::slice_from_bytes::<u32>(rest, *num_checks as usize)
102+
.expect(".wasmtime.epochchecks section should be long enough to contain starts");
103+
104+
starts
105+
.binary_search(&check)
106+
.ok()
107+
.map(|epoch_index| check + (if get_bit(ends, epoch_index) { 4 } else { 3 }))
108+
}
109+
110+
/// Sets bit `dest_bit_number` (0-based) in `dest_slice` to `value`.
111+
fn set_bit(dest_slice: &mut Vec<u8>, dest_bit_number: usize, value: bool) {
112+
let byte = dest_bit_number / 8;
113+
if byte >= dest_slice.len() {
114+
dest_slice.resize(byte + 1, 0);
66115
}
116+
let bit = dest_bit_number % 8;
117+
if value {
118+
dest_slice[byte] |= 1 << bit;
119+
} else {
120+
dest_slice[byte] &= !(1 << bit);
121+
}
122+
}
123+
124+
/// Returns bit `bit_number` (0-based) of `bytes`.
125+
fn get_bit(bytes: &[u8], bit_number: usize) -> bool {
126+
let byte = bit_number / 8;
127+
let bit = bit_number % 8;
128+
bytes[byte] & (1 << bit) != 0
129+
}
130+
131+
#[test]
132+
fn test_get_and_set_bit() {
133+
let mut bits = vec![];
134+
set_bit(&mut bits, 0, true); // first
135+
set_bit(&mut bits, 2, true); // middle
136+
set_bit(&mut bits, 18, true); // something beyond first byte
137+
assert_eq!(bits, vec![5, 0, 4]);
138+
assert!(get_bit(&bits, 0));
139+
assert!(get_bit(&bits, 18));
140+
assert!(!get_bit(&bits, 3));
141+
}
142+
143+
#[test]
144+
fn test_set_bit_clear() {
145+
let mut bits = vec![0xFF];
146+
set_bit(&mut bits, 1, false);
147+
assert_eq!(bits, vec![0xFD]);
67148
}

crates/environ/src/obj.rs

Lines changed: 21 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -98,17 +98,27 @@ pub const ELF_WASMTIME_STACK_MAP: &str = ".wasmtime.stackmap";
9898
/// to the 32-bit encodings for offsets this doesn't support images >=4gb.
9999
pub const ELF_WASMTIME_TRAPS: &str = ".wasmtime.traps";
100100

101-
/// A custom section which contains the offsets of instructions which check for
102-
/// the end of epochs when using `--epoch-interruption-via-mmu`.
103-
///
104-
/// The contents are examined at runtime by the signal handler to determine
105-
/// whether a segfault is due to an epoch ending (vs. a legitimate crash).
106-
///
107-
/// This section is a sorted array of 32-bit unsigned little-endian integers
108-
/// which represent offsets from the beginning of the text section to the
109-
/// instruction following the load which triggers epoch-ending segfaults. TODO:
110-
/// We may point elsewhere if it's more useful to the signal handler. Be careful
111-
/// if this could end up pointing off the end of the text section.
101+
/// A custom section through which we can locate instructions which
102+
/// check for the end of epochs when using `--epoch-interruption-via-mmu`.
103+
///
104+
/// The section allows for finding both the beginnings and ends of such
105+
/// instructions so the signal handler can identify segfaults which signify the
106+
/// end of an epoch (vs. ordinary crashes) and also find the address at which to
107+
/// resume afterward.
108+
///
109+
/// The format is architecture-dependent. Because the only forseen need to read
110+
/// it is on the platform where the binary will be executed and performance is
111+
/// sensitive, endianness follows whatever is native for the target. For x64,
112+
/// the only currently supported platform, the section comprises these data:
113+
/// * A u32 stating how many epoch checks are in each of the following 2 items
114+
/// * A sorted array of u32s which represent offsets from the beginning of the
115+
/// text section to the load instruction triggering the epoch end
116+
/// * A parallel array of bits representing the length of those load
117+
/// instructions: 0 meaning 3 bytes, 1 meaning 4. These are the only lengths
118+
/// possible on x64. Resumption should happen directly after the load
119+
/// instruction. There is no danger of this location pointing beyond the end
120+
/// of the function; even if a function is empty, there's at least a return
121+
/// (or, in case of an infinite loop, a jmp) after its prologue.
112122
///
113123
/// The 32-bit encodings herein mean >=4gb text sections are not supported.
114124
pub const ELF_WASMTIME_EPOCH_CHECKS: &str = ".wasmtime.epochchecks";

crates/environ/src/tunables.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ define_tunables! {
9090
/// Whether or not we use epoch-based interruption.
9191
pub epoch_interruption: bool,
9292

93-
/// Whether or not to use MMU tricks to speed epoch deadline checks.
93+
/// Whether or not to use MMU tricks to speed epoch-end checks.
9494
/// TODO: Consider whether this should be orthogonal to
9595
/// epoch_interruption. If not, combine them into an enum or something.
9696
pub epoch_interruption_via_mmu: bool,

crates/wasmtime/src/config.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -755,7 +755,8 @@ impl Config {
755755
self
756756
}
757757

758-
/// Stuff
758+
/// Enables a faster-than-deadlines epoch detection mechanism based on
759+
/// memory-page permissions
759760
pub fn epoch_interruption_via_mmu(&mut self, enable: bool) -> &mut Self {
760761
self.tunables.epoch_interruption_via_mmu = Some(enable);
761762
self

crates/wasmtime/src/runtime/code_memory.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ use core::ops::Range;
88
use object::SectionFlags;
99
use object::endian::Endianness;
1010
use object::read::{Object, ObjectSection, elf::ElfFile64};
11-
use wasmtime_environ::{Trap, lookup_trap_code, obj};
11+
use wasmtime_environ::{Trap, lookup_trap_code, obj, return_offset_for_epoch_check};
1212
use wasmtime_unwinder::ExceptionTable;
1313

1414
/// Management of executable memory within a `MmapVec`
@@ -33,6 +33,7 @@ pub struct CodeMemory {
3333
text: Range<usize>,
3434
unwind: Range<usize>,
3535
trap_data: Range<usize>,
36+
epoch_check_data: Range<usize>,
3637
wasm_data: Range<usize>,
3738
address_map_data: Range<usize>,
3839
stack_map_data: Range<usize>,
@@ -127,6 +128,7 @@ impl CodeMemory {
127128
#[cfg(feature = "debug-builtins")]
128129
let mut has_native_debug_info = false;
129130
let mut trap_data = 0..0;
131+
let mut epoch_check_data = 0..0;
130132
let mut exception_data = 0..0;
131133
let mut frame_tables_data = 0..0;
132134
let mut wasm_data = 0..0;
@@ -178,6 +180,7 @@ impl CodeMemory {
178180
obj::ELF_WASMTIME_ADDRMAP => address_map_data = range,
179181
obj::ELF_WASMTIME_STACK_MAP => stack_map_data = range,
180182
obj::ELF_WASMTIME_TRAPS => trap_data = range,
183+
obj::ELF_WASMTIME_EPOCH_CHECKS => epoch_check_data = range,
181184
obj::ELF_WASMTIME_EXCEPTIONS => exception_data = range,
182185
obj::ELF_WASMTIME_FRAMES => frame_tables_data = range,
183186
obj::ELF_NAME_DATA => func_name_data = range,
@@ -222,6 +225,7 @@ impl CodeMemory {
222225
text,
223226
unwind,
224227
trap_data,
228+
epoch_check_data,
225229
address_map_data,
226230
stack_map_data,
227231
exception_data,

tests/all/epoch_mmu.rs

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#![cfg(not(miri))]
22

3-
use object::{Object, ObjectSection};
3+
use object::{LittleEndian, Object, ObjectSection, U32Bytes};
44
use wasmtime::{Config, Engine};
55
use wasmtime_environ::obj::ELF_WASMTIME_EPOCH_CHECKS;
66

@@ -35,17 +35,28 @@ fn epoch_check_offsets() {
3535
"{ELF_WASMTIME_EPOCH_CHECKS} section should be present"
3636
));
3737
let data = section.data().unwrap();
38-
let offsets: Vec<u32> = data
39-
.chunks_exact(4)
40-
.map(|c| u32::from_le_bytes(c.try_into().unwrap()))
41-
.collect();
38+
39+
let (count_raw, rest) = object::from_bytes::<U32Bytes<LittleEndian>>(data).expect(
40+
".wasmtime.epochchecks section should be long enough to contain a count of epoch checks",
41+
);
42+
let count = count_raw.get(LittleEndian) as usize;
43+
let (starts_raw, rest) = object::slice_from_bytes::<U32Bytes<LittleEndian>>(rest, count)
44+
.expect(".wasmtime.epochchecks section should be long enough to contain a location for each epoch check");
45+
let starts: Vec<u32> = starts_raw.iter().map(|b| b.get(LittleEndian)).collect();
46+
let (length_bits, _rest) = object::slice_from_bytes::<u8>(rest, count.div_ceil(8))
47+
.expect(".wasmtime.epochchecks section should be long enough to contain a length bit for each epoch check");
4248

4349
// The emitted machine code is nailed down by the
4450
// epoch-interruption-mmu-compile-loop.wat disas test. As long as that keeps
45-
// passing, these offsets remain valid.
51+
// passing, these values remain valid.
52+
assert_eq!(
53+
starts,
54+
vec![12, 15],
55+
"There should be 2 epoch checks (function prologue & loop backedge). The offset of the prologue's dead load should be 12, and that of the loop's backedge should be 15."
56+
);
4657
assert_eq!(
47-
offsets,
48-
vec![15, 18],
49-
"There should be 2 epoch checks (function prologue & loop backedge). The offset after the prologue's dead load should be 15, and the one after the loop's backedge should be 18."
58+
length_bits,
59+
vec![0],
60+
"Neither check's load instruction uses R12 of RSP as its source, so all length bits should be 0."
5061
);
5162
}

0 commit comments

Comments
 (0)