Skip to content

Commit de9e55a

Browse files
committed
Improve documentation
1 parent 90a9888 commit de9e55a

1 file changed

Lines changed: 40 additions & 42 deletions

File tree

zjit/src/hir.rs

Lines changed: 40 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -5073,36 +5073,49 @@ impl Function {
50735073
}
50745074
}
50755075

5076-
// TODO: Replace hashmaps with vecs?
5077-
// TODO: Add comments describing load store elimination in the first pass
5078-
// TODO: Link to rails at scale post?
5079-
// TODO: Add comments describing deada store elimination in the second pass
5080-
// TODO: Add master comment about type based alias analysis with specific callouts referencing the overall TBAA comment
5081-
// TODO(Jacob): Spend time with the old noggin to answer the following questions.
5082-
// 1. It is possible to do DSE forwards. Is it possible to do this at the same time as load store?
5083-
// If so, this would allow us to do it in one pass*. We still need to clean up the dead stores afterwards and convert them to
5084-
// NOPs or something. The more I think about it, the more I think it makes sense to have a cleanup pass at the end of all other
5085-
// analysis passes.
5086-
// 2. DSE and load-store-opt seem like duals of each other. If this is true, then they should be symmetric. Why does DSE only handle
5087-
// stores? Can I convince myself that we're not missing some potential dead loads? Is this caught at an SSA level already?
5076+
/// Load store optimization performs two functions:
5077+
/// 1. Elide duplicate LoadField and StoreField instructions, and replace
5078+
/// redundant loads or stores with their values when possible.
5079+
/// 2. Remove "dead stores". A store is dead when it is never used before
5080+
/// getting overwritten by a second store. If this pass proves no use,
5081+
/// the first store is elided.
5082+
///
5083+
/// These two optimizations use very similar abstract interpretations and are
5084+
/// currently combined into one pass. The primary difference is that load
5085+
/// store optimization runs forward (from top to bottom of a block) while
5086+
/// dead store elimination runs in reverse. A subtle secondary difference is
5087+
/// that our use of extended basic blocks induces an asymmetry between these
5088+
/// two optimizations. When scanning in reverse, dead store elimination needs
5089+
/// to consider the "multi-exit" nature of EBBs. Stores that appear to be
5090+
/// dead can sometimes have early exits that make the optimization unsound
5091+
/// if we don't handle this special case. Basic blocks avoid this problem.
5092+
///
5093+
/// Note for future improvements:
5094+
/// Load & store optimizations currently operate at a block level, but could
5095+
/// be expanded to operate on methods. This likely means the two passes
5096+
/// should be split into their own methods as instead of "one forwards, one
5097+
/// backwards" we need to traverse dominator and post dominator trees and it
5098+
/// becomes tricky enough that splitting may be cleaner.
5099+
///
5100+
/// For further reading, see the following Rails at Scale blog post.
5101+
/// https://railsatscale.com/2026-03-18-how-zjit-removes-redundant-object-loads-and-stores/
50885102
fn optimize_load_store(&mut self) {
50895103
#[derive(PartialEq, Eq, Hash, Clone, Copy)]
50905104
struct HeapKey {
50915105
object: InsnId,
50925106
offset: i32,
50935107
}
50945108

5095-
// This helper function is used while we don't have type based alias
5096-
// analysis. Matching offsets could result in aliased objects that need
5097-
// to be clear.
5098-
// Any code location that uses the remove_aliasing function needs to be replaced
5099-
// when we implement TBAA
5100-
fn remove_aliasing(map: &mut HashMap<HeapKey, InsnId>, offset: i32) {
5109+
// This helper function is used to clear the cache for matching offsets
5110+
// while we don't have type based alias.
5111+
// TBAA will primarily modify any part of this optimization that
5112+
// currently uses this helper function.
5113+
fn flush_aliasing(map: &mut HashMap<HeapKey, InsnId>, offset: i32) {
51015114
map.retain(|HeapKey { object: _, offset: off }, _| *off != offset);
51025115
}
51035116

5117+
// Redundant load and store optimization
51045118
for block in self.rpo() {
5105-
// First, we eliminate loads and stores
51065119
let mut compile_time_heap: HashMap<HeapKey, InsnId> = HashMap::new();
51075120
let old_insns = std::mem::take(&mut self.blocks[block.0].insns);
51085121
let mut new_insns = vec![];
@@ -5112,12 +5125,11 @@ impl Function {
51125125
Insn::StoreField { recv, offset, val, .. } => {
51135126
let key = HeapKey { object: self.chase_insn(recv), offset };
51145127
let heap_entry = compile_time_heap.get(&key).copied();
5115-
// TODO(Jacob): Switch from actual to partial equality
51165128
if Some(val) == heap_entry {
51175129
// If the value is already stored, short circuit and don't add an instruction to the block
51185130
continue
51195131
}
5120-
remove_aliasing(&mut compile_time_heap, offset);
5132+
flush_aliasing(&mut compile_time_heap, offset);
51215133
compile_time_heap.insert(key, val);
51225134
},
51235135
Insn::LoadField { recv, offset, .. } => {
@@ -5126,7 +5138,6 @@ impl Function {
51265138
std::collections::hash_map::Entry::Occupied(entry) => {
51275139
// If the value is stored already, we should short circuit.
51285140
// However, we need to replace insn_id with its representative in the SSA union.
5129-
// TODO: Show an example of why we need this with p little ascii diagram or something
51305141
self.make_equal_to(insn_id, *entry.get());
51315142
continue
51325143
}
@@ -5142,7 +5153,7 @@ impl Function {
51425153
// But flags does not exist in our effects abstract heap modeling and we don't want to add special casing to effects.
51435154
// This special casing in this pass here should be removed once we refine our effects system to provide greater granularity for WriteBarrier.
51445155
let offset = RUBY_OFFSET_RBASIC_FLAGS;
5145-
remove_aliasing(&mut compile_time_heap, offset);
5156+
flush_aliasing(&mut compile_time_heap, offset);
51465157
},
51475158
insn => {
51485159
// If an instruction affects memory and we haven't modeled it, the compile_time_heap is invalidated
@@ -5154,25 +5165,12 @@ impl Function {
51545165
new_insns.push(insn_id);
51555166
}
51565167

5157-
5158-
// get the block
5159-
// load-store optimization
5160-
// reverse updated instructions
5161-
// DSE
5162-
// reverse final updated instructions
5163-
// return
5164-
//
5165-
// get the block
5166-
// load-store optimization, DSE* after (same iteration)
5167-
// instead of eliding the store, mark the earlier store for removal
5168-
// return
5169-
5170-
// After performing scalar replacement on loads and stores where
5171-
// applicable, we scan in reverse to find an eliminate dead stores
5168+
// Set up for dead store elimination
51725169
let mut reversed_insns = std::mem::take(&mut new_insns);
51735170
reversed_insns.reverse();
51745171
compile_time_heap.clear();
51755172

5173+
// Dead store elimnination
51765174
for insn_id in reversed_insns {
51775175
match self.find(insn_id) {
51785176
Insn::StoreField { recv, offset, .. } => {
@@ -5182,21 +5180,21 @@ impl Function {
51825180
continue
51835181
}
51845182
else {
5185-
remove_aliasing(&mut compile_time_heap, offset);
5183+
flush_aliasing(&mut compile_time_heap, offset);
51865184
compile_time_heap.insert(key, insn_id);
51875185
}
51885186
}
51895187
Insn::LoadField{ offset, .. } => {
5190-
remove_aliasing(&mut compile_time_heap, offset);
5188+
flush_aliasing(&mut compile_time_heap, offset);
51915189
}
51925190
Insn::WriteBarrier { .. } => {
51935191
let offset = RUBY_OFFSET_RBASIC_FLAGS;
5194-
remove_aliasing(&mut compile_time_heap, offset);
5192+
flush_aliasing(&mut compile_time_heap, offset);
51955193
},
51965194
insn => {
51975195
let insn_reads_memory = insn.effects_of().includes(Effect::read(abstract_heaps::Memory));
51985196
let insn_uses_control_flow = insn.effects_of().includes(effects::Control);
5199-
// TODO: Once we update from extended basic blocks to normal basic blocks, we can remove the control flow check
5197+
// TODO(Jacob): Once we update from extended basic blocks to normal basic blocks, we can remove the control flow check
52005198
if insn_reads_memory || insn_uses_control_flow {
52015199
compile_time_heap.clear();
52025200
}

0 commit comments

Comments
 (0)