Skip to content

Commit 298d15a

Browse files
committed
Improve documentation
1 parent e90e33a commit 298d15a

1 file changed

Lines changed: 40 additions & 42 deletions

File tree

zjit/src/hir.rs

Lines changed: 40 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -4836,36 +4836,49 @@ impl Function {
48364836
}
48374837
}
48384838

4839-
// TODO: Replace hashmaps with vecs?
4840-
// TODO: Add comments describing load store elimination in the first pass
4841-
// TODO: Link to rails at scale post?
4842-
// TODO: Add comments describing deada store elimination in the second pass
4843-
// TODO: Add master comment about type based alias analysis with specific callouts referencing the overall TBAA comment
4844-
// TODO(Jacob): Spend time with the old noggin to answer the following questions.
4845-
// 1. It is possible to do DSE forwards. Is it possible to do this at the same time as load store?
4846-
// If so, this would allow us to do it in one pass*. We still need to clean up the dead stores afterwards and convert them to
4847-
// NOPs or something. The more I think about it, the more I think it makes sense to have a cleanup pass at the end of all other
4848-
// analysis passes.
4849-
// 2. DSE and load-store-opt seem like duals of each other. If this is true, then they should be symmetric. Why does DSE only handle
4850-
// stores? Can I convince myself that we're not missing some potential dead loads? Is this caught at an SSA level already?
4839+
/// Load store optimization performs two functions:
4840+
/// 1. Elide duplicate LoadField and StoreField instructions, and replace
4841+
/// redundant loads or stores with their values when possible.
4842+
/// 2. Remove "dead stores". A store is dead when it is never used before
4843+
/// getting overwritten by a second store. If this pass proves no use,
4844+
/// the first store is elided.
4845+
///
4846+
/// These two optimizations use very similar abstract interpretations and are
4847+
/// currently combined into one pass. The primary difference is that load
4848+
/// store optimization runs forward (from top to bottom of a block) while
4849+
/// dead store elimination runs in reverse. A subtle secondary difference is
4850+
/// that our use of extended basic blocks induces an asymmetry between these
4851+
/// two optimizations. When scanning in reverse, dead store elimination needs
4852+
/// to consider the "multi-exit" nature of EBBs. Stores that appear to be
4853+
/// dead can sometimes have early exits that make the optimization unsound
4854+
/// if we don't handle this special case. Basic blocks avoid this problem.
4855+
///
4856+
/// Note for future improvements:
4857+
/// Load & store optimizations currently operate at a block level, but could
4858+
/// be expanded to operate on methods. This likely means the two passes
4859+
/// should be split into their own methods as instead of "one forwards, one
4860+
/// backwards" we need to traverse dominator and post dominator trees and it
4861+
/// becomes tricky enough that splitting may be cleaner.
4862+
///
4863+
/// For further reading, see the following Rails at Scale blog post.
4864+
/// https://railsatscale.com/2026-03-18-how-zjit-removes-redundant-object-loads-and-stores/
48514865
fn optimize_load_store(&mut self) {
48524866
#[derive(PartialEq, Eq, Hash, Clone, Copy)]
48534867
struct HeapKey {
48544868
object: InsnId,
48554869
offset: i32,
48564870
}
48574871

4858-
// This helper function is used while we don't have type based alias
4859-
// analysis. Matching offsets could result in aliased objects that need
4860-
// to be clear.
4861-
// Any code location that uses the remove_aliasing function needs to be replaced
4862-
// when we implement TBAA
4863-
fn remove_aliasing(map: &mut HashMap<HeapKey, InsnId>, offset: i32) {
4872+
// This helper function is used to clear the cache for matching offsets
4873+
// while we don't have type based alias.
4874+
// TBAA will primarily modify any part of this optimization that
4875+
// currently uses this helper function.
4876+
fn flush_aliasing(map: &mut HashMap<HeapKey, InsnId>, offset: i32) {
48644877
map.retain(|HeapKey { object: _, offset: off }, _| *off != offset);
48654878
}
48664879

4880+
// Redundant load and store optimization
48674881
for block in self.rpo() {
4868-
// First, we eliminate loads and stores
48694882
let mut compile_time_heap: HashMap<HeapKey, InsnId> = HashMap::new();
48704883
let old_insns = std::mem::take(&mut self.blocks[block.0].insns);
48714884
let mut new_insns = vec![];
@@ -4875,12 +4888,11 @@ impl Function {
48754888
Insn::StoreField { recv, offset, val, .. } => {
48764889
let key = HeapKey { object: self.chase_insn(recv), offset };
48774890
let heap_entry = compile_time_heap.get(&key).copied();
4878-
// TODO(Jacob): Switch from actual to partial equality
48794891
if Some(val) == heap_entry {
48804892
// If the value is already stored, short circuit and don't add an instruction to the block
48814893
continue
48824894
}
4883-
remove_aliasing(&mut compile_time_heap, offset);
4895+
flush_aliasing(&mut compile_time_heap, offset);
48844896
compile_time_heap.insert(key, val);
48854897
},
48864898
Insn::LoadField { recv, offset, .. } => {
@@ -4889,7 +4901,6 @@ impl Function {
48894901
std::collections::hash_map::Entry::Occupied(entry) => {
48904902
// If the value is stored already, we should short circuit.
48914903
// However, we need to replace insn_id with its representative in the SSA union.
4892-
// TODO: Show an example of why we need this with p little ascii diagram or something
48934904
self.make_equal_to(insn_id, *entry.get());
48944905
continue
48954906
}
@@ -4905,7 +4916,7 @@ impl Function {
49054916
// But flags does not exist in our effects abstract heap modeling and we don't want to add special casing to effects.
49064917
// This special casing in this pass here should be removed once we refine our effects system to provide greater granularity for WriteBarrier.
49074918
let offset = RUBY_OFFSET_RBASIC_FLAGS;
4908-
remove_aliasing(&mut compile_time_heap, offset);
4919+
flush_aliasing(&mut compile_time_heap, offset);
49094920
},
49104921
insn => {
49114922
// If an instruction affects memory and we haven't modeled it, the compile_time_heap is invalidated
@@ -4917,25 +4928,12 @@ impl Function {
49174928
new_insns.push(insn_id);
49184929
}
49194930

4920-
4921-
// get the block
4922-
// load-store optimization
4923-
// reverse updated instructions
4924-
// DSE
4925-
// reverse final updated instructions
4926-
// return
4927-
//
4928-
// get the block
4929-
// load-store optimization, DSE* after (same iteration)
4930-
// instead of eliding the store, mark the earlier store for removal
4931-
// return
4932-
4933-
// After performing scalar replacement on loads and stores where
4934-
// applicable, we scan in reverse to find an eliminate dead stores
4931+
// Set up for dead store elimination
49354932
let mut reversed_insns = std::mem::take(&mut new_insns);
49364933
reversed_insns.reverse();
49374934
compile_time_heap.clear();
49384935

4936+
// Dead store elimnination
49394937
for insn_id in reversed_insns {
49404938
match self.find(insn_id) {
49414939
Insn::StoreField { recv, offset, .. } => {
@@ -4945,21 +4943,21 @@ impl Function {
49454943
continue
49464944
}
49474945
else {
4948-
remove_aliasing(&mut compile_time_heap, offset);
4946+
flush_aliasing(&mut compile_time_heap, offset);
49494947
compile_time_heap.insert(key, insn_id);
49504948
}
49514949
}
49524950
Insn::LoadField{ offset, .. } => {
4953-
remove_aliasing(&mut compile_time_heap, offset);
4951+
flush_aliasing(&mut compile_time_heap, offset);
49544952
}
49554953
Insn::WriteBarrier { .. } => {
49564954
let offset = RUBY_OFFSET_RBASIC_FLAGS;
4957-
remove_aliasing(&mut compile_time_heap, offset);
4955+
flush_aliasing(&mut compile_time_heap, offset);
49584956
},
49594957
insn => {
49604958
let insn_reads_memory = insn.effects_of().includes(Effect::read(abstract_heaps::Memory));
49614959
let insn_uses_control_flow = insn.effects_of().includes(effects::Control);
4962-
// TODO: Once we update from extended basic blocks to normal basic blocks, we can remove the control flow check
4960+
// TODO(Jacob): Once we update from extended basic blocks to normal basic blocks, we can remove the control flow check
49634961
if insn_reads_memory || insn_uses_control_flow {
49644962
compile_time_heap.clear();
49654963
}

0 commit comments

Comments
 (0)