@@ -14,13 +14,14 @@ See the License for the specific language governing permissions and
1414limitations under the License.
1515*/
1616
17- use std:: collections:: HashMap ;
17+ use std:: collections:: { BTreeMap , HashMap } ;
1818use std:: sync:: atomic:: { AtomicU64 , Ordering } ;
1919
2020use hyperlight_common:: layout:: { scratch_base_gpa, scratch_base_gva} ;
2121use hyperlight_common:: vmem;
2222use hyperlight_common:: vmem:: {
23- BasicMapping , CowMapping , Mapping , MappingKind , PAGE_SIZE , TableOps ,
23+ BasicMapping , CowMapping , Mapping , MappingKind , PAGE_SIZE , SpaceAwareMapping , SpaceId ,
24+ TableOps ,
2425} ;
2526use tracing:: { Span , instrument} ;
2627
@@ -242,43 +243,23 @@ impl<'a> core::convert::AsRef<SharedMemoryPageTableBuffer<'a>> for SharedMemoryP
242243 self
243244 }
244245}
245- fn filtered_mappings < ' a > (
246- snap : & ' a [ u8 ] ,
247- scratch : & ' a [ u8 ] ,
248- regions : & [ MemoryRegion ] ,
249- layout : SandboxMemoryLayout ,
250- root_pts : & [ u64 ] ,
251- ) -> Vec < ( usize , Mapping , & ' a [ u8 ] ) > {
252- let mut result = Vec :: new ( ) ;
253- let scratch_gva = scratch_base_gva ( layout. get_scratch_size ( ) ) ;
254-
255- for ( root_idx, & root_pt) in root_pts. iter ( ) . enumerate ( ) {
256- let op = SharedMemoryPageTableBuffer :: new ( snap, scratch, layout, root_pt) ;
257-
258- let iter = unsafe { vmem:: virt_to_phys ( & op, 0 , hyperlight_common:: layout:: MAX_GVA as u64 ) } ;
259-
260- for m in iter {
261- // the scratch map doesn't count
262- if m. virt_base >= scratch_gva {
263- continue ;
264- }
265- // neither does the mapping of the snapshot's own page tables
266- #[ cfg( not( feature = "i686-guest" ) ) ]
267- if m. virt_base >= hyperlight_common:: layout:: SNAPSHOT_PT_GVA_MIN as u64
268- && m. virt_base <= hyperlight_common:: layout:: SNAPSHOT_PT_GVA_MAX as u64
269- {
270- continue ;
271- }
272-
273- if let Some ( contents) =
274- unsafe { guest_page ( snap, scratch, regions, layout, m. phys_base ) }
275- {
276- result. push ( ( root_idx, m, contents) ) ;
277- }
278- }
279- }
280-
281- result
246+ /// Return true if `virt_base` is a VA we must not preserve into the
247+ /// rebuilt snapshot page tables: it is either part of the scratch
248+ /// region (re-mapped freshly by `map_specials`) or, on amd64, part of
249+ /// the self-map of the snapshot's own page tables.
250+ fn skip_virt ( virt_base : u64 , scratch_gva : u64 ) -> bool {
251+ if virt_base >= scratch_gva {
252+ return true ;
253+ }
254+ #[ cfg( not( feature = "i686-guest" ) ) ]
255+ if virt_base >= hyperlight_common:: layout:: SNAPSHOT_PT_GVA_MIN as u64
256+ && virt_base <= hyperlight_common:: layout:: SNAPSHOT_PT_GVA_MAX as u64
257+ {
258+ return true ;
259+ }
260+ #[ cfg( feature = "i686-guest" ) ]
261+ let _ = virt_base;
262+ false
282263}
283264
284265/// Find the contents of the page which starts at gpa in guest physical
@@ -456,55 +437,115 @@ impl Snapshot {
456437 entrypoint : NextAction ,
457438 ) -> Result < Self > {
458439 let mut phys_seen = HashMap :: < u64 , usize > :: new ( ) ;
440+ let scratch_gva = scratch_base_gva ( layout. get_scratch_size ( ) ) ;
459441 let memory = shared_mem. with_contents ( |snap_c| {
460442 scratch_mem. with_contents ( |scratch_c| {
461- // Phase 1: walk every PT root and collect live pages,
462- // tagged with which root they belong to (for per-process
463- // PD isolation on i686).
464- let live_pages =
465- filtered_mappings ( snap_c, scratch_c, & regions, layout, root_pt_gpas) ;
466-
467- // Phase 2: compact live pages into a dense snapshot blob
468- // and build new page tables with compacted GPAs.
443+ // Phase 1: walk every PT root together. This detects
444+ // aliased intermediate tables (e.g. Nanvix's kernel-
445+ // half PTs, which multiple process PDs share by
446+ // pointing at the same PT page). The walker emits
447+ // `ThisSpace(leaf)` for private leaves and
448+ // `AnotherSpace(ref)` for sub-trees that were already
449+ // seen via an earlier root. Results are returned in
450+ // `root_pt_gpas` order — which is also the topological
451+ // order of the `AnotherSpace` references — so
452+ // processing in iteration order is safe.
453+ let op = SharedMemoryPageTableBuffer :: new (
454+ snap_c,
455+ scratch_c,
456+ layout,
457+ root_pt_gpas. first ( ) . copied ( ) . unwrap_or ( 0 ) ,
458+ ) ;
459+ let walk = unsafe {
460+ vmem:: walk_va_spaces (
461+ & op,
462+ root_pt_gpas,
463+ 0 ,
464+ hyperlight_common:: layout:: MAX_GVA as u64 ,
465+ )
466+ } ;
467+
468+ // Phase 2: rebuild each space's page tables, compacting
469+ // `ThisSpace` leaves into a dense snapshot blob and
470+ // linking `AnotherSpace` entries to already-built
471+ // spaces' tables.
469472 // TODO: Look for opportunities to hugepage map
470473 let mut snapshot_memory: Vec < u8 > = Vec :: new ( ) ;
471474 let pt_buf = GuestPageTableBuffer :: new ( layout. get_pt_base_gpa ( ) as usize ) ;
472475 for _ in 1 ..root_pt_gpas. len ( ) {
473476 unsafe { pt_buf. alloc_table ( ) } ;
474477 }
475478
476- for ( _root_idx, mapping, contents) in live_pages {
477- // Convert a snapshot mapping kind for compaction: writable
478- // pages become CoW, read-only pages stay read-only.
479- let kind = match mapping. kind {
480- MappingKind :: Cow ( cm) => MappingKind :: Cow ( cm) ,
481- MappingKind :: Basic ( bm) if bm. writable => MappingKind :: Cow ( CowMapping {
482- readable : bm. readable ,
483- executable : bm. executable ,
484- } ) ,
485- MappingKind :: Basic ( bm) => MappingKind :: Basic ( BasicMapping {
486- readable : bm. readable ,
487- writable : false ,
488- executable : bm. executable ,
489- } ) ,
490- MappingKind :: Unmapped => continue ,
491- } ;
492- let new_gpa = phys_seen. entry ( mapping. phys_base ) . or_insert_with ( || {
493- let new_offset = snapshot_memory. len ( ) ;
494- snapshot_memory. extend ( contents) ;
495- new_offset + SandboxMemoryLayout :: BASE_ADDRESS
496- } ) ;
497-
498- pt_buf. set_root_offset ( _root_idx * PAGE_SIZE ) ;
499-
500- let mapping = Mapping {
501- phys_base : * new_gpa as u64 ,
502- virt_base : mapping. virt_base ,
503- len : PAGE_SIZE as u64 ,
504- kind,
505- user_accessible : mapping. user_accessible ,
506- } ;
507- unsafe { vmem:: map ( & pt_buf, mapping) } ;
479+ let mut built_roots: BTreeMap < SpaceId , u64 > = BTreeMap :: new ( ) ;
480+ for ( root_idx, ( space_id, mappings) ) in walk. into_iter ( ) . enumerate ( ) {
481+ pt_buf. set_root_offset ( root_idx * PAGE_SIZE ) ;
482+ built_roots. insert (
483+ space_id,
484+ ( layout. get_pt_base_gpa ( ) as usize + root_idx * PAGE_SIZE ) as u64 ,
485+ ) ;
486+
487+ for sam in mappings {
488+ match sam {
489+ SpaceAwareMapping :: ThisSpace ( mapping) => {
490+ // Drop the scratch region and (on
491+ // amd64) the snapshot's own PT
492+ // self-map; both are re-mapped
493+ // freshly by `map_specials`.
494+ if skip_virt ( mapping. virt_base , scratch_gva) {
495+ continue ;
496+ }
497+ let Some ( contents) = ( unsafe {
498+ guest_page ( snap_c, scratch_c, & regions, layout, mapping. phys_base )
499+ } ) else {
500+ continue ;
501+ } ;
502+
503+ // Writable pages become CoW in the
504+ // rebuilt snapshot; read-only pages
505+ // stay read-only.
506+ let kind = match mapping. kind {
507+ MappingKind :: Cow ( cm) => MappingKind :: Cow ( cm) ,
508+ MappingKind :: Basic ( bm) if bm. writable => {
509+ MappingKind :: Cow ( CowMapping {
510+ readable : bm. readable ,
511+ executable : bm. executable ,
512+ } )
513+ }
514+ MappingKind :: Basic ( bm) => MappingKind :: Basic ( BasicMapping {
515+ readable : bm. readable ,
516+ writable : false ,
517+ executable : bm. executable ,
518+ } ) ,
519+ MappingKind :: Unmapped => continue ,
520+ } ;
521+ let new_gpa =
522+ phys_seen. entry ( mapping. phys_base ) . or_insert_with ( || {
523+ let new_offset = snapshot_memory. len ( ) ;
524+ snapshot_memory. extend ( contents) ;
525+ new_offset + SandboxMemoryLayout :: BASE_ADDRESS
526+ } ) ;
527+
528+ let compacted = Mapping {
529+ phys_base : * new_gpa as u64 ,
530+ virt_base : mapping. virt_base ,
531+ len : PAGE_SIZE as u64 ,
532+ kind,
533+ user_accessible : mapping. user_accessible ,
534+ } ;
535+ unsafe { vmem:: map ( & pt_buf, compacted) } ;
536+ }
537+ SpaceAwareMapping :: AnotherSpace ( ref_map) => {
538+ // Link to the owning space's already-
539+ // rebuilt intermediate table — this
540+ // is what preserves Nanvix's
541+ // kernel-half-shared invariant across
542+ // process PDs after relocation.
543+ unsafe {
544+ vmem:: space_aware_map ( & pt_buf, ref_map, & built_roots) ;
545+ }
546+ }
547+ }
548+ }
508549 }
509550
510551 // Phase 3: Map the scratch region into each root.
0 commit comments