@@ -5073,36 +5073,49 @@ impl Function {
50735073 }
50745074 }
50755075
5076- // TODO: Replace hashmaps with vecs?
5077- // TODO: Add comments describing load store elimination in the first pass
5078- // TODO: Link to rails at scale post?
5079- // TODO: Add comments describing deada store elimination in the second pass
5080- // TODO: Add master comment about type based alias analysis with specific callouts referencing the overall TBAA comment
5081- // TODO(Jacob): Spend time with the old noggin to answer the following questions.
5082- // 1. It is possible to do DSE forwards. Is it possible to do this at the same time as load store?
5083- // If so, this would allow us to do it in one pass*. We still need to clean up the dead stores afterwards and convert them to
5084- // NOPs or something. The more I think about it, the more I think it makes sense to have a cleanup pass at the end of all other
5085- // analysis passes.
5086- // 2. DSE and load-store-opt seem like duals of each other. If this is true, then they should be symmetric. Why does DSE only handle
5087- // stores? Can I convince myself that we're not missing some potential dead loads? Is this caught at an SSA level already?
5076+ /// Load store optimization performs two functions:
5077+ /// 1. Elide duplicate LoadField and StoreField instructions, and replace
5078+ /// redundant loads or stores with their values when possible.
5079+ /// 2. Remove "dead stores". A store is dead when it is never used before
5080+ /// getting overwritten by a second store. If this pass proves no use,
5081+ /// the first store is elided.
5082+ ///
5083+ /// These two optimizations use very similar abstract interpretations and are
5084+ /// currently combined into one pass. The primary difference is that load
5085+ /// store optimization runs forward (from top to bottom of a block) while
5086+ /// dead store elimination runs in reverse. A subtle secondary difference is
5087+ /// that our use of extended basic blocks induces an asymmetry between these
5088+ /// two optimizations. When scanning in reverse, dead store elimination needs
5089+ /// to consider the "multi-exit" nature of EBBs. Stores that appear to be
5090+ /// dead can sometimes have early exits that make the optimization unsound
5091+ /// if we don't handle this special case. Basic blocks avoid this problem.
5092+ ///
5093+ /// Note for future improvements:
5094+ /// Load & store optimizations currently operate at a block level, but could
5095+ /// be expanded to operate on methods. This likely means the two passes
5096+ /// should be split into their own methods as instead of "one forwards, one
5097+ /// backwards" we need to traverse dominator and post dominator trees and it
5098+ /// becomes tricky enough that splitting may be cleaner.
5099+ ///
5100+ /// For further reading, see the following Rails at Scale blog post.
5101+ /// https://railsatscale.com/2026-03-18-how-zjit-removes-redundant-object-loads-and-stores/
50885102 fn optimize_load_store ( & mut self ) {
50895103 #[ derive( PartialEq , Eq , Hash , Clone , Copy ) ]
50905104 struct HeapKey {
50915105 object : InsnId ,
50925106 offset : i32 ,
50935107 }
50945108
5095- // This helper function is used while we don't have type based alias
5096- // analysis. Matching offsets could result in aliased objects that need
5097- // to be clear.
5098- // Any code location that uses the remove_aliasing function needs to be replaced
5099- // when we implement TBAA
5100- fn remove_aliasing ( map : & mut HashMap < HeapKey , InsnId > , offset : i32 ) {
5109+ // This helper function is used to clear the cache for matching offsets
5110+ // while we don't have type based alias.
5111+ // TBAA will primarily modify any part of this optimization that
5112+ // currently uses this helper function.
5113+ fn flush_aliasing ( map : & mut HashMap < HeapKey , InsnId > , offset : i32 ) {
51015114 map. retain ( |HeapKey { object : _, offset : off } , _| * off != offset) ;
51025115 }
51035116
5117+ // Redundant load and store optimization
51045118 for block in self . rpo ( ) {
5105- // First, we eliminate loads and stores
51065119 let mut compile_time_heap: HashMap < HeapKey , InsnId > = HashMap :: new ( ) ;
51075120 let old_insns = std:: mem:: take ( & mut self . blocks [ block. 0 ] . insns ) ;
51085121 let mut new_insns = vec ! [ ] ;
@@ -5112,12 +5125,11 @@ impl Function {
51125125 Insn :: StoreField { recv, offset, val, .. } => {
51135126 let key = HeapKey { object : self . chase_insn ( recv) , offset } ;
51145127 let heap_entry = compile_time_heap. get ( & key) . copied ( ) ;
5115- // TODO(Jacob): Switch from actual to partial equality
51165128 if Some ( val) == heap_entry {
51175129 // If the value is already stored, short circuit and don't add an instruction to the block
51185130 continue
51195131 }
5120- remove_aliasing ( & mut compile_time_heap, offset) ;
5132+ flush_aliasing ( & mut compile_time_heap, offset) ;
51215133 compile_time_heap. insert ( key, val) ;
51225134 } ,
51235135 Insn :: LoadField { recv, offset, .. } => {
@@ -5126,7 +5138,6 @@ impl Function {
51265138 std:: collections:: hash_map:: Entry :: Occupied ( entry) => {
51275139 // If the value is stored already, we should short circuit.
51285140 // However, we need to replace insn_id with its representative in the SSA union.
5129- // TODO: Show an example of why we need this with p little ascii diagram or something
51305141 self . make_equal_to ( insn_id, * entry. get ( ) ) ;
51315142 continue
51325143 }
@@ -5142,7 +5153,7 @@ impl Function {
51425153 // But flags does not exist in our effects abstract heap modeling and we don't want to add special casing to effects.
51435154 // This special casing in this pass here should be removed once we refine our effects system to provide greater granularity for WriteBarrier.
51445155 let offset = RUBY_OFFSET_RBASIC_FLAGS ;
5145- remove_aliasing ( & mut compile_time_heap, offset) ;
5156+ flush_aliasing ( & mut compile_time_heap, offset) ;
51465157 } ,
51475158 insn => {
51485159 // If an instruction affects memory and we haven't modeled it, the compile_time_heap is invalidated
@@ -5154,25 +5165,12 @@ impl Function {
51545165 new_insns. push ( insn_id) ;
51555166 }
51565167
5157-
5158- // get the block
5159- // load-store optimization
5160- // reverse updated instructions
5161- // DSE
5162- // reverse final updated instructions
5163- // return
5164- //
5165- // get the block
5166- // load-store optimization, DSE* after (same iteration)
5167- // instead of eliding the store, mark the earlier store for removal
5168- // return
5169-
5170- // After performing scalar replacement on loads and stores where
5171- // applicable, we scan in reverse to find an eliminate dead stores
5168+ // Set up for dead store elimination
51725169 let mut reversed_insns = std:: mem:: take ( & mut new_insns) ;
51735170 reversed_insns. reverse ( ) ;
51745171 compile_time_heap. clear ( ) ;
51755172
5173+ // Dead store elimnination
51765174 for insn_id in reversed_insns {
51775175 match self . find ( insn_id) {
51785176 Insn :: StoreField { recv, offset, .. } => {
@@ -5182,21 +5180,21 @@ impl Function {
51825180 continue
51835181 }
51845182 else {
5185- remove_aliasing ( & mut compile_time_heap, offset) ;
5183+ flush_aliasing ( & mut compile_time_heap, offset) ;
51865184 compile_time_heap. insert ( key, insn_id) ;
51875185 }
51885186 }
51895187 Insn :: LoadField { offset, .. } => {
5190- remove_aliasing ( & mut compile_time_heap, offset) ;
5188+ flush_aliasing ( & mut compile_time_heap, offset) ;
51915189 }
51925190 Insn :: WriteBarrier { .. } => {
51935191 let offset = RUBY_OFFSET_RBASIC_FLAGS ;
5194- remove_aliasing ( & mut compile_time_heap, offset) ;
5192+ flush_aliasing ( & mut compile_time_heap, offset) ;
51955193 } ,
51965194 insn => {
51975195 let insn_reads_memory = insn. effects_of ( ) . includes ( Effect :: read ( abstract_heaps:: Memory ) ) ;
51985196 let insn_uses_control_flow = insn. effects_of ( ) . includes ( effects:: Control ) ;
5199- // TODO: Once we update from extended basic blocks to normal basic blocks, we can remove the control flow check
5197+ // TODO(Jacob) : Once we update from extended basic blocks to normal basic blocks, we can remove the control flow check
52005198 if insn_reads_memory || insn_uses_control_flow {
52015199 compile_time_heap. clear ( ) ;
52025200 }
0 commit comments