Skip to content

Commit b4f8388

Browse files
danbugsandreiltd
authored andcommitted
feat: multi-space page-table walk + link primitives
Adds SpaceAwareMapping, SpaceReferenceMapping, SpaceId, walk_va_spaces, and space_aware_map in hyperlight_common::vmem. These primitives let a caller walk several PT roots together, detect aliased intermediate tables (the 'kernel-half shared' pattern where multiple process PDs point at the same PT pages), and rebuild those aliases on the write side by linking into already-built tables instead of copying. i686 implements the real walker (depth-1 PT sharing, which is what Nanvix needs) plus space_aware_map that composes a PDE from the owning space's rebuilt table. amd64 gets a non-aliasing single-root walker so the architecture-independent re-export compiles; no current amd64 embedder exercises aliasing. aarch64 remains a TODO with stubs. Signed-off-by: danbugs <danilochiarlone@gmail.com> Signed-off-by: Tomasz Andrzejak <andreiltd@gmail.com>
1 parent 83d9785 commit b4f8388

4 files changed

Lines changed: 396 additions & 3 deletions

File tree

src/hyperlight_common/src/arch/aarch64/vmem.rs

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,29 @@ pub unsafe fn virt_to_phys<'a, Op: TableReadOps + 'a>(
4646
core::iter::empty()
4747
}
4848

49+
/// Stub — see [`crate::vmem::walk_va_spaces`].
50+
#[allow(clippy::missing_safety_doc)]
51+
pub unsafe fn walk_va_spaces<Op: TableReadOps>(
52+
_op: &Op,
53+
_roots: &[Op::TableAddr],
54+
_address: u64,
55+
_len: u64,
56+
) -> ::alloc::vec::Vec<(
57+
crate::vmem::SpaceId,
58+
::alloc::vec::Vec<crate::vmem::SpaceAwareMapping>,
59+
)> {
60+
::alloc::vec::Vec::new()
61+
}
62+
63+
/// Stub — see [`crate::vmem::space_aware_map`].
64+
#[allow(clippy::missing_safety_doc)]
65+
pub unsafe fn space_aware_map<Op: TableOps>(
66+
_op: &Op,
67+
_ref_map: crate::vmem::SpaceReferenceMapping,
68+
_built_roots: &::alloc::collections::BTreeMap<crate::vmem::SpaceId, Op::TableAddr>,
69+
) {
70+
}
71+
4972
pub trait TableMovability<Op: TableReadOps + ?Sized, TableMoveInfo> {}
5073
impl<Op: TableOps<TableMovability = crate::vmem::MayMoveTable>> TableMovability<Op, Op::TableAddr>
5174
for crate::vmem::MayMoveTable

src/hyperlight_common/src/arch/amd64/vmem.rs

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,99 @@ unsafe fn map_page<
265265
/// 2. PDPT (38:30) - allocate PD if needed
266266
/// 3. PD (29:21) - allocate PT if needed
267267
/// 4. PT (20:12) - write final PTE with physical address and flags
268+
/// Multi-space page-table walking on amd64: walks each root
269+
/// independently and emits all leaves as `ThisSpace`. Aliased
270+
/// intermediate-table detection is not implemented — no current
271+
/// embedder exercises that pattern on amd64 (it is an i686/Nanvix-
272+
/// specific concern).
273+
#[allow(clippy::missing_safety_doc)]
274+
pub unsafe fn walk_va_spaces<Op: TableReadOps>(
275+
op: &Op,
276+
roots: &[Op::TableAddr],
277+
address: u64,
278+
len: u64,
279+
) -> ::alloc::vec::Vec<(
280+
crate::vmem::SpaceId,
281+
::alloc::vec::Vec<crate::vmem::SpaceAwareMapping>,
282+
)> {
283+
use ::alloc::vec::Vec;
284+
285+
let mut out: Vec<(
286+
crate::vmem::SpaceId,
287+
Vec<crate::vmem::SpaceAwareMapping>,
288+
)> = Vec::with_capacity(roots.len());
289+
290+
let addr = address & ((1u64 << VA_BITS) - 1);
291+
let vmin = addr & !(PAGE_SIZE as u64 - 1);
292+
let vmax = core::cmp::min(addr + len, 1u64 << VA_BITS);
293+
294+
for &root in roots {
295+
#[allow(clippy::unnecessary_cast)]
296+
let root_id: crate::vmem::SpaceId = Op::to_phys(root) as u64;
297+
let mut mappings: Vec<crate::vmem::SpaceAwareMapping> = Vec::new();
298+
299+
let iter = modify_ptes::<47, 39, Op, _>(MapRequest {
300+
table_base: root,
301+
vmin,
302+
len: vmax.saturating_sub(vmin),
303+
update_parent: UpdateParentNone {},
304+
})
305+
.filter_map(|r| unsafe { require_pte_exist(op, r) })
306+
.flat_map(modify_ptes::<38, 30, Op, _>)
307+
.filter_map(|r| unsafe { require_pte_exist(op, r) })
308+
.flat_map(modify_ptes::<29, 21, Op, _>)
309+
.filter_map(|r| unsafe { require_pte_exist(op, r) })
310+
.flat_map(modify_ptes::<20, 12, Op, _>);
311+
312+
for r in iter {
313+
let Some(pte) = (unsafe { read_pte_if_present(op, r.entry_ptr) }) else {
314+
continue;
315+
};
316+
let phys_addr = pte & PTE_ADDR_MASK;
317+
let sgn_bit = r.vmin >> (VA_BITS - 1);
318+
let sgn_bits = 0u64.wrapping_sub(sgn_bit) << VA_BITS;
319+
let virt_addr = sgn_bits | r.vmin;
320+
321+
let executable = (pte & PAGE_NX) == 0;
322+
let avl = pte & PTE_AVL_MASK;
323+
let kind = if avl == PAGE_AVL_COW {
324+
MappingKind::Cow(CowMapping {
325+
readable: true,
326+
executable,
327+
})
328+
} else {
329+
MappingKind::Basic(BasicMapping {
330+
readable: true,
331+
writable: (pte & PAGE_RW) != 0,
332+
executable,
333+
})
334+
};
335+
mappings.push(crate::vmem::SpaceAwareMapping::ThisSpace(Mapping {
336+
phys_base: phys_addr,
337+
virt_base: virt_addr,
338+
len: PAGE_SIZE as u64,
339+
kind,
340+
user_accessible: false,
341+
}));
342+
}
343+
344+
out.push((root_id, mappings));
345+
}
346+
347+
out
348+
}
349+
350+
/// See [`walk_va_spaces`]: amd64 never emits `AnotherSpace`, so this
351+
/// is unreachable in practice. It silently no-ops (rather than
352+
/// panicking) to keep the architecture-independent re-export usable.
353+
#[allow(clippy::missing_safety_doc)]
354+
pub unsafe fn space_aware_map<Op: TableOps>(
355+
_op: &Op,
356+
_ref_map: crate::vmem::SpaceReferenceMapping,
357+
_built_roots: &::alloc::collections::BTreeMap<crate::vmem::SpaceId, Op::TableAddr>,
358+
) {
359+
}
360+
268361
#[allow(clippy::missing_safety_doc)]
269362
pub unsafe fn map<Op: TableOps>(op: &Op, mapping: Mapping) {
270363
modify_ptes::<47, 39, Op, _>(MapRequest {

src/hyperlight_common/src/arch/i686/vmem.rs

Lines changed: 184 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,9 @@ limitations under the License.
2222
//! Entries are 4 bytes wide. There is no NX bit; all pages are executable.
2323
2424
use crate::vmem::{
25-
BasicMapping, CowMapping, MapRequest, MapResponse, Mapping, MappingKind, TableMovabilityBase,
26-
TableOps, TableReadOps, UpdateParent, UpdateParentNone, modify_ptes, read_pte_if_present,
27-
require_pte_exist, write_entry_updating,
25+
BasicMapping, CowMapping, MapRequest, MapResponse, Mapping, MappingKind, SpaceAwareMapping,
26+
SpaceId, SpaceReferenceMapping, TableMovabilityBase, TableOps, TableReadOps, UpdateParent,
27+
UpdateParentNone, modify_ptes, read_pte_if_present, require_pte_exist, write_entry_updating,
2828
};
2929

3030
pub const PAGE_SIZE: usize = 4096;
@@ -169,6 +169,187 @@ pub unsafe fn map<Op: TableOps>(op: &Op, mapping: Mapping) {
169169
.for_each(drop);
170170
}
171171

172+
//==================================================================================================
173+
// Multi-space walk / link (shared intermediate tables)
174+
//==================================================================================================
175+
176+
/// i686 has two levels (PD -> PT). The only sharable thing is a PT,
177+
/// at depth 1 (one level below the root PD).
178+
const SHARED_TABLE_DEPTH: usize = 1;
179+
180+
/// Walk multiple root PDs together, detecting PDEs that point at the
181+
/// same PT PA across roots (i.e. aliased PTs — the standard
182+
/// "kernel-half shared" trick on x86 without KPTI). The first root to
183+
/// visit a given PT PA becomes the "owner"; later roots that alias it
184+
/// receive `AnotherSpace(SpaceReferenceMapping { depth: 1, .. })`
185+
/// entries.
186+
///
187+
/// Generic over `TableAddr` so it works with both the in-guest
188+
/// implementation (`TableAddr = u32`, backed by raw pointers) and the
189+
/// host-side snapshot buffer (`TableAddr = u64`, byte offsets).
190+
///
191+
/// # Safety
192+
/// Same invariants as [`virt_to_phys`]. Callers must not mutate the
193+
/// page tables concurrently.
194+
#[allow(clippy::missing_safety_doc)]
195+
pub unsafe fn walk_va_spaces<Op: TableReadOps>(
196+
op: &Op,
197+
roots: &[Op::TableAddr],
198+
address: u64,
199+
len: u64,
200+
) -> ::alloc::vec::Vec<(SpaceId, ::alloc::vec::Vec<SpaceAwareMapping>)> {
201+
use ::alloc::vec::Vec;
202+
203+
// Map: PT PA -> (owner SpaceId, the VA at which the owner used
204+
// this PT). Subsequent visits to the same PT PA emit AnotherSpace.
205+
let mut seen_pts: ::alloc::collections::BTreeMap<u64, (SpaceId, u64)> =
206+
::alloc::collections::BTreeMap::new();
207+
let mut results: Vec<(SpaceId, Vec<SpaceAwareMapping>)> = Vec::with_capacity(roots.len());
208+
209+
let vmin = address & !(PAGE_SIZE as u64 - 1);
210+
let vmax = core::cmp::min(address + len, 1u64 << VA_BITS);
211+
212+
for &root in roots {
213+
#[allow(clippy::unnecessary_cast)]
214+
let root_id: SpaceId = Op::to_phys(root) as u64;
215+
let mut mappings: Vec<SpaceAwareMapping> = Vec::new();
216+
217+
// Iterate PDEs covering [vmin, vmax) at the PD level (bits 31:22).
218+
let pde_iter = modify_ptes::<31, 22, Op, _>(MapRequest {
219+
table_base: root,
220+
vmin,
221+
len: vmax.saturating_sub(vmin),
222+
update_parent: UpdateParentNone {},
223+
});
224+
for r in pde_iter {
225+
let Some(pde) = (unsafe { read_pte_if_present(op, r.entry_ptr) }) else {
226+
continue;
227+
};
228+
let pt_pa: u64 = pde & PTE_ADDR_MASK;
229+
230+
// Seen this PT via an earlier root? Emit AnotherSpace and
231+
// don't descend — the sub-tree is fully described by the
232+
// owner's entries.
233+
if let Some(&(owner, their_va)) = seen_pts.get(&pt_pa) {
234+
if owner != root_id {
235+
mappings.push(SpaceAwareMapping::AnotherSpace(SpaceReferenceMapping {
236+
depth: SHARED_TABLE_DEPTH,
237+
space: owner,
238+
our_va: r.vmin,
239+
their_va,
240+
}));
241+
continue;
242+
}
243+
// Same space saw this PT before (shouldn't happen with
244+
// the virt_to_phys-style per-PDE iteration, but skip
245+
// defensively).
246+
continue;
247+
}
248+
seen_pts.insert(pt_pa, (root_id, r.vmin));
249+
250+
// Descend the PT and emit ThisSpace entries for each live
251+
// 4KB leaf, mirroring virt_to_phys's leaf-emission logic.
252+
let pt_request = MapRequest {
253+
#[allow(clippy::unnecessary_cast)]
254+
table_base: Op::from_phys(pt_pa as PhysAddr),
255+
vmin: r.vmin,
256+
len: r.len,
257+
update_parent: UpdateParentNone {},
258+
};
259+
for leaf in modify_ptes::<21, 12, Op, _>(pt_request) {
260+
let Some(pte) = (unsafe { read_pte_if_present(op, leaf.entry_ptr) }) else {
261+
continue;
262+
};
263+
let phys_addr = pte & PTE_ADDR_MASK;
264+
let avl = pte & PTE_AVL_MASK;
265+
let kind = if avl == PAGE_AVL_COW {
266+
MappingKind::Cow(CowMapping {
267+
readable: true,
268+
executable: true,
269+
})
270+
} else {
271+
MappingKind::Basic(BasicMapping {
272+
readable: true,
273+
writable: (pte & PAGE_RW) != 0,
274+
executable: true,
275+
})
276+
};
277+
mappings.push(SpaceAwareMapping::ThisSpace(Mapping {
278+
phys_base: phys_addr,
279+
virt_base: leaf.vmin,
280+
len: PAGE_SIZE as u64,
281+
kind,
282+
user_accessible: (pte & PAGE_USER) != 0,
283+
}));
284+
}
285+
}
286+
287+
results.push((root_id, mappings));
288+
}
289+
290+
results
291+
}
292+
293+
/// Install the link described by `ref_map` in `op`'s root PT tree:
294+
/// look up what the owner space's rebuilt root put at `their_va`'s
295+
/// PDE slot, and write that PA into our root's PDE slot for
296+
/// `our_va`. The owner's rebuilt root is found via `built_roots`.
297+
///
298+
/// On i686 `ref_map.depth` must be 1 (PT-level sharing). Other depths
299+
/// are rejected defensively.
300+
///
301+
/// # Safety
302+
/// Same invariants as [`map`]: caller owns the concurrency story and
303+
/// must invalidate the TLB if the page tables are live.
304+
#[allow(clippy::missing_safety_doc)]
305+
pub unsafe fn space_aware_map<Op: TableOps>(
306+
op: &Op,
307+
ref_map: SpaceReferenceMapping,
308+
built_roots: &::alloc::collections::BTreeMap<SpaceId, Op::TableAddr>,
309+
) {
310+
assert!(
311+
ref_map.depth == SHARED_TABLE_DEPTH,
312+
"i686 only supports depth={} sharing; got depth={}",
313+
SHARED_TABLE_DEPTH,
314+
ref_map.depth
315+
);
316+
317+
// Their rebuilt root — must have been populated earlier in the
318+
// rebuild loop (walk_va_spaces guarantees topological order).
319+
let Some(&their_root) = built_roots.get(&ref_map.space) else {
320+
// Defensive: we have no linkage target. Skip rather than
321+
// panic. A trace print would live here in a debug build.
322+
return;
323+
};
324+
325+
// Read their PDE at their_va's index to get the rebuilt PT PA.
326+
let their_pdi = (ref_map.their_va >> 22) & 0x3FF;
327+
let their_pde_ptr = Op::entry_addr(
328+
their_root,
329+
their_pdi * core::mem::size_of::<PageTableEntry>() as u64,
330+
);
331+
let Some(their_pde) = (unsafe { read_pte_if_present(op, their_pde_ptr) }) else {
332+
// Owner didn't end up with a PDE here — nothing to link.
333+
return;
334+
};
335+
let their_pt_pa: u64 = their_pde & PTE_ADDR_MASK;
336+
337+
// Compose our PDE: point at their PT, preserve their PDE's low
338+
// bits (PAGE_USER for kernel-accessible, PAGE_RW, etc.) so the
339+
// hardware still honours sharing semantics uniformly.
340+
let our_pdi = (ref_map.our_va >> 22) & 0x3FF;
341+
let our_root = op.root_table();
342+
let our_pde_ptr = Op::entry_addr(
343+
our_root,
344+
our_pdi * core::mem::size_of::<PageTableEntry>() as u64,
345+
);
346+
347+
let new_pde: u64 = their_pt_pa | (their_pde & !PTE_ADDR_MASK) | PAGE_PRESENT;
348+
unsafe {
349+
write_entry_updating(op, Op::TableMovability::root_update_parent(), our_pde_ptr, new_pde);
350+
}
351+
}
352+
172353
/// Translate a virtual address range to its backing physical pages.
173354
///
174355
/// # Safety

0 commit comments

Comments
 (0)