@@ -4836,36 +4836,49 @@ impl Function {
48364836 }
48374837 }
48384838
4839- // TODO: Replace hashmaps with vecs?
4840- // TODO: Add comments describing load store elimination in the first pass
4841- // TODO: Link to rails at scale post?
4842- // TODO: Add comments describing deada store elimination in the second pass
4843- // TODO: Add master comment about type based alias analysis with specific callouts referencing the overall TBAA comment
4844- // TODO(Jacob): Spend time with the old noggin to answer the following questions.
4845- // 1. It is possible to do DSE forwards. Is it possible to do this at the same time as load store?
4846- // If so, this would allow us to do it in one pass*. We still need to clean up the dead stores afterwards and convert them to
4847- // NOPs or something. The more I think about it, the more I think it makes sense to have a cleanup pass at the end of all other
4848- // analysis passes.
4849- // 2. DSE and load-store-opt seem like duals of each other. If this is true, then they should be symmetric. Why does DSE only handle
4850- // stores? Can I convince myself that we're not missing some potential dead loads? Is this caught at an SSA level already?
4839+ /// Load store optimization performs two functions:
4840+ /// 1. Elide duplicate LoadField and StoreField instructions, and replace
4841+ /// redundant loads or stores with their values when possible.
4842+ /// 2. Remove "dead stores". A store is dead when it is never used before
4843+ /// getting overwritten by a second store. If this pass proves no use,
4844+ /// the first store is elided.
4845+ ///
4846+ /// These two optimizations use very similar abstract interpretations and are
4847+ /// currently combined into one pass. The primary difference is that load
4848+ /// store optimization runs forward (from top to bottom of a block) while
4849+ /// dead store elimination runs in reverse. A subtle secondary difference is
4850+ /// that our use of extended basic blocks induces an asymmetry between these
4851+ /// two optimizations. When scanning in reverse, dead store elimination needs
4852+ /// to consider the "multi-exit" nature of EBBs. Stores that appear to be
4853+ /// dead can sometimes have early exits that make the optimization unsound
4854+ /// if we don't handle this special case. Basic blocks avoid this problem.
4855+ ///
4856+ /// Note for future improvements:
4857+ /// Load & store optimizations currently operate at a block level, but could
4858+ /// be expanded to operate on methods. This likely means the two passes
4859+ /// should be split into their own methods as instead of "one forwards, one
4860+ /// backwards" we need to traverse dominator and post dominator trees and it
4861+ /// becomes tricky enough that splitting may be cleaner.
4862+ ///
4863+ /// For further reading, see the following Rails at Scale blog post.
4864+ /// https://railsatscale.com/2026-03-18-how-zjit-removes-redundant-object-loads-and-stores/
48514865 fn optimize_load_store ( & mut self ) {
48524866 #[ derive( PartialEq , Eq , Hash , Clone , Copy ) ]
48534867 struct HeapKey {
48544868 object : InsnId ,
48554869 offset : i32 ,
48564870 }
48574871
4858- // This helper function is used while we don't have type based alias
4859- // analysis. Matching offsets could result in aliased objects that need
4860- // to be clear.
4861- // Any code location that uses the remove_aliasing function needs to be replaced
4862- // when we implement TBAA
4863- fn remove_aliasing ( map : & mut HashMap < HeapKey , InsnId > , offset : i32 ) {
4872+ // This helper function is used to clear the cache for matching offsets
4873+ // while we don't have type based alias.
4874+ // TBAA will primarily modify any part of this optimization that
4875+ // currently uses this helper function.
4876+ fn flush_aliasing ( map : & mut HashMap < HeapKey , InsnId > , offset : i32 ) {
48644877 map. retain ( |HeapKey { object : _, offset : off } , _| * off != offset) ;
48654878 }
48664879
4880+ // Redundant load and store optimization
48674881 for block in self . rpo ( ) {
4868- // First, we eliminate loads and stores
48694882 let mut compile_time_heap: HashMap < HeapKey , InsnId > = HashMap :: new ( ) ;
48704883 let old_insns = std:: mem:: take ( & mut self . blocks [ block. 0 ] . insns ) ;
48714884 let mut new_insns = vec ! [ ] ;
@@ -4875,12 +4888,11 @@ impl Function {
48754888 Insn :: StoreField { recv, offset, val, .. } => {
48764889 let key = HeapKey { object : self . chase_insn ( recv) , offset } ;
48774890 let heap_entry = compile_time_heap. get ( & key) . copied ( ) ;
4878- // TODO(Jacob): Switch from actual to partial equality
48794891 if Some ( val) == heap_entry {
48804892 // If the value is already stored, short circuit and don't add an instruction to the block
48814893 continue
48824894 }
4883- remove_aliasing ( & mut compile_time_heap, offset) ;
4895+ flush_aliasing ( & mut compile_time_heap, offset) ;
48844896 compile_time_heap. insert ( key, val) ;
48854897 } ,
48864898 Insn :: LoadField { recv, offset, .. } => {
@@ -4889,7 +4901,6 @@ impl Function {
48894901 std:: collections:: hash_map:: Entry :: Occupied ( entry) => {
48904902 // If the value is stored already, we should short circuit.
48914903 // However, we need to replace insn_id with its representative in the SSA union.
4892- // TODO: Show an example of why we need this with p little ascii diagram or something
48934904 self . make_equal_to ( insn_id, * entry. get ( ) ) ;
48944905 continue
48954906 }
@@ -4905,7 +4916,7 @@ impl Function {
49054916 // But flags does not exist in our effects abstract heap modeling and we don't want to add special casing to effects.
49064917 // This special casing in this pass here should be removed once we refine our effects system to provide greater granularity for WriteBarrier.
49074918 let offset = RUBY_OFFSET_RBASIC_FLAGS ;
4908- remove_aliasing ( & mut compile_time_heap, offset) ;
4919+ flush_aliasing ( & mut compile_time_heap, offset) ;
49094920 } ,
49104921 insn => {
49114922 // If an instruction affects memory and we haven't modeled it, the compile_time_heap is invalidated
@@ -4917,25 +4928,12 @@ impl Function {
49174928 new_insns. push ( insn_id) ;
49184929 }
49194930
4920-
4921- // get the block
4922- // load-store optimization
4923- // reverse updated instructions
4924- // DSE
4925- // reverse final updated instructions
4926- // return
4927- //
4928- // get the block
4929- // load-store optimization, DSE* after (same iteration)
4930- // instead of eliding the store, mark the earlier store for removal
4931- // return
4932-
4933- // After performing scalar replacement on loads and stores where
4934- // applicable, we scan in reverse to find an eliminate dead stores
4931+ // Set up for dead store elimination
49354932 let mut reversed_insns = std:: mem:: take ( & mut new_insns) ;
49364933 reversed_insns. reverse ( ) ;
49374934 compile_time_heap. clear ( ) ;
49384935
4936+ // Dead store elimnination
49394937 for insn_id in reversed_insns {
49404938 match self . find ( insn_id) {
49414939 Insn :: StoreField { recv, offset, .. } => {
@@ -4945,21 +4943,21 @@ impl Function {
49454943 continue
49464944 }
49474945 else {
4948- remove_aliasing ( & mut compile_time_heap, offset) ;
4946+ flush_aliasing ( & mut compile_time_heap, offset) ;
49494947 compile_time_heap. insert ( key, insn_id) ;
49504948 }
49514949 }
49524950 Insn :: LoadField { offset, .. } => {
4953- remove_aliasing ( & mut compile_time_heap, offset) ;
4951+ flush_aliasing ( & mut compile_time_heap, offset) ;
49544952 }
49554953 Insn :: WriteBarrier { .. } => {
49564954 let offset = RUBY_OFFSET_RBASIC_FLAGS ;
4957- remove_aliasing ( & mut compile_time_heap, offset) ;
4955+ flush_aliasing ( & mut compile_time_heap, offset) ;
49584956 } ,
49594957 insn => {
49604958 let insn_reads_memory = insn. effects_of ( ) . includes ( Effect :: read ( abstract_heaps:: Memory ) ) ;
49614959 let insn_uses_control_flow = insn. effects_of ( ) . includes ( effects:: Control ) ;
4962- // TODO: Once we update from extended basic blocks to normal basic blocks, we can remove the control flow check
4960+ // TODO(Jacob) : Once we update from extended basic blocks to normal basic blocks, we can remove the control flow check
49634961 if insn_reads_memory || insn_uses_control_flow {
49644962 compile_time_heap. clear ( ) ;
49654963 }
0 commit comments