@@ -228,6 +228,19 @@ pub struct SsaBuilder {
228228 /// becomes a single Cranelift `fsqrt` (or libm-backed `sin`/`cos`)
229229 /// instruction.
230230 intrinsic_alias_map : IndexMap < InternedString , crate :: hir:: Intrinsic > ,
231+ /// When set, the next `@reference`-class struct literal that
232+ /// gets lowered uses this pointer as its allocation site INSTEAD
233+ /// of emitting a fresh `Malloc`. Used by the array-of-`@reference`
234+ /// literal lowering to pool-allocate the N bodies into one
235+ /// contiguous buffer: spatial locality on cache-bound architectures
236+ /// (x86 GHA runners) gets you the value-type access pattern while
237+ /// preserving the pointer-array semantics user code expects.
238+ ///
239+ /// Always `None` outside the array-of-`@reference` literal path.
240+ /// The array lowering pushes a slot pointer before each element
241+ /// expression and clears it immediately after, so nested struct
242+ /// literals don't get accidentally placed.
243+ array_pool_placement : Option < HirId > ,
231244}
232245
233246/// Context for pattern matching
@@ -502,6 +515,7 @@ impl SsaBuilder {
502515 resume_param_names : HashSet :: new ( ) ,
503516 preset_param_typed_ast_types : IndexMap :: new ( ) ,
504517 intrinsic_alias_map : default_intrinsic_alias_map ( ) ,
518+ array_pool_placement : None ,
505519 }
506520 }
507521
@@ -544,6 +558,7 @@ impl SsaBuilder {
544558 resume_param_names : HashSet :: new ( ) ,
545559 preset_param_typed_ast_types : IndexMap :: new ( ) ,
546560 intrinsic_alias_map : default_intrinsic_alias_map ( ) ,
561+ array_pool_placement : None ,
547562 function,
548563 } ;
549564 // Pre-register all existing blocks in the definitions map
@@ -4852,33 +4867,45 @@ impl SsaBuilder {
48524867 }
48534868 let total_size = running. max ( 1 ) ;
48544869
4855- // Size constant for malloc.
4856- let size_const = self . create_value (
4857- HirType :: I64 ,
4858- HirValueKind :: Constant ( crate :: hir:: HirConstant :: I64 ( total_size as i64 ) ) ,
4859- ) ;
4860-
4861- // Emit `Call(Intrinsic::Malloc, [size])`. The
4862- // backend lowers Malloc to a libc call returning a
4863- // pointer; the SSA value is typed
4864- // `Ptr(Struct{..})` to match the struct's HIR
4865- // type.
4866- let malloc_result =
4867- self . create_value ( struct_ty. clone ( ) , HirValueKind :: Instruction ) ;
4868- self . add_instruction (
4869- block_id,
4870- HirInstruction :: Call {
4871- result : Some ( malloc_result) ,
4872- callee : crate :: hir:: HirCallable :: Intrinsic (
4873- crate :: hir:: Intrinsic :: Malloc ,
4874- ) ,
4875- args : vec ! [ size_const] ,
4876- type_args : vec ! [ ] ,
4877- const_args : vec ! [ ] ,
4878- is_tail : false ,
4879- } ,
4880- ) ;
4881- self . add_use ( size_const, malloc_result) ;
4870+ // Allocation site. Two modes:
4871+ // * Normal: emit `Call(Intrinsic::Malloc, [size])`
4872+ // and field-store through the returned pointer.
4873+ // * Pooled (`array_pool_placement = Some(ptr)`):
4874+ // skip the malloc entirely — the array literal
4875+ // lowering pre-allocated one big buffer for all
4876+ // N bodies and handed us a pointer to slot i.
4877+ // Field stores go directly to that slot, so the
4878+ // N bodies end up contiguous in memory. Spatial
4879+ // locality matches a value-type Array<Body> on
4880+ // cache-bound architectures (x86 GHA runners)
4881+ // while preserving the pointer-array surface
4882+ // the rest of the compiler expects.
4883+ let malloc_result = if let Some ( slot_ptr) = self . array_pool_placement {
4884+ slot_ptr
4885+ } else {
4886+ let size_const = self . create_value (
4887+ HirType :: I64 ,
4888+ HirValueKind :: Constant ( crate :: hir:: HirConstant :: I64 (
4889+ total_size as i64 ,
4890+ ) ) ,
4891+ ) ;
4892+ let r = self . create_value ( struct_ty. clone ( ) , HirValueKind :: Instruction ) ;
4893+ self . add_instruction (
4894+ block_id,
4895+ HirInstruction :: Call {
4896+ result : Some ( r) ,
4897+ callee : crate :: hir:: HirCallable :: Intrinsic (
4898+ crate :: hir:: Intrinsic :: Malloc ,
4899+ ) ,
4900+ args : vec ! [ size_const] ,
4901+ type_args : vec ! [ ] ,
4902+ const_args : vec ! [ ] ,
4903+ is_tail : false ,
4904+ } ,
4905+ ) ;
4906+ self . add_use ( size_const, r) ;
4907+ r
4908+ } ;
48824909
48834910 // For each field: emit GEP (byte offset) + Store.
48844911 // GEPs use HirType::U8 + a single i64 byte-offset
@@ -5039,9 +5066,84 @@ impl SsaBuilder {
50395066 } ,
50405067 ) ;
50415068
5069+ // Pool-allocation eligibility: `elem_ty` is `Ptr(Struct(..))`
5070+ // (every element is an `@reference` class) AND every
5071+ // `elem_expr` is a direct `TypedExpression::Struct`
5072+ // literal. When eligible, we allocate ONE buffer of
5073+ // `N * sizeof(Struct)` and hand each struct literal a
5074+ // pre-computed slot pointer instead of letting it emit
5075+ // its own `Malloc`. Bodies land contiguously in memory;
5076+ // the pointer-array (`data_ptr` slots) still holds
5077+ // distinct per-body pointers so identity semantics are
5078+ // preserved.
5079+ let pool_eligible = matches ! ( elem_ty, HirType :: Ptr ( ref inner) if matches!( * * inner, HirType :: Struct ( _) ) )
5080+ && elements
5081+ . iter ( )
5082+ . all ( |e| matches ! ( e. node, TypedExpression :: Struct ( _) ) ) ;
5083+ let ( pool_buf_ptr, pool_slot_size) = if pool_eligible {
5084+ if let HirType :: Ptr ( ref inner) = elem_ty {
5085+ let slot_size = hir_ty_size ( inner) ;
5086+ let pool_total = elements. len ( ) * slot_size;
5087+ let size_const = self . create_value (
5088+ HirType :: I64 ,
5089+ HirValueKind :: Constant ( crate :: hir:: HirConstant :: I64 ( pool_total as i64 ) ) ,
5090+ ) ;
5091+ let pool = self . create_value ( elem_ty. clone ( ) , HirValueKind :: Instruction ) ;
5092+ self . add_instruction (
5093+ block_id,
5094+ HirInstruction :: Call {
5095+ result : Some ( pool) ,
5096+ callee : crate :: hir:: HirCallable :: Intrinsic (
5097+ crate :: hir:: Intrinsic :: Malloc ,
5098+ ) ,
5099+ args : vec ! [ size_const] ,
5100+ type_args : vec ! [ ] ,
5101+ const_args : vec ! [ ] ,
5102+ is_tail : false ,
5103+ } ,
5104+ ) ;
5105+ self . add_use ( size_const, pool) ;
5106+ ( Some ( pool) , slot_size)
5107+ } else {
5108+ ( None , 0 )
5109+ }
5110+ } else {
5111+ ( None , 0 )
5112+ } ;
5113+
50425114 // Step 2: Store each element into the data buffer
50435115 for ( i, elem_expr) in elements. iter ( ) . enumerate ( ) {
5116+ // For pooled `@reference` arrays: compute slot_i =
5117+ // pool_buf + i*slot_size and hand it to the struct
5118+ // literal lowering via `array_pool_placement`. The
5119+ // struct literal will use this pointer instead of
5120+ // mallocing — fields land at pool_buf+i*slot_size+
5121+ // field_offset, exactly contiguous.
5122+ if let Some ( pool) = pool_buf_ptr {
5123+ let offset = i * pool_slot_size;
5124+ let offset_const = self . create_value (
5125+ HirType :: I64 ,
5126+ HirValueKind :: Constant ( crate :: hir:: HirConstant :: I64 ( offset as i64 ) ) ,
5127+ ) ;
5128+ let slot_ptr =
5129+ self . create_value ( elem_ty. clone ( ) , HirValueKind :: Instruction ) ;
5130+ self . add_instruction (
5131+ block_id,
5132+ HirInstruction :: GetElementPtr {
5133+ result : slot_ptr,
5134+ ty : HirType :: U8 ,
5135+ ptr : pool,
5136+ indices : vec ! [ offset_const] ,
5137+ } ,
5138+ ) ;
5139+ self . add_use ( pool, slot_ptr) ;
5140+ self . add_use ( offset_const, slot_ptr) ;
5141+ self . array_pool_placement = Some ( slot_ptr) ;
5142+ }
50445143 let elem_val = self . translate_expression ( block_id, elem_expr) ?;
5144+ // Clear immediately so it doesn't leak into nested
5145+ // struct literals inside subsequent elements.
5146+ self . array_pool_placement = None ;
50455147
50465148 let offset = i * elem_size;
50475149 let offset_const = self . create_value (
0 commit comments