Skip to content
43 changes: 40 additions & 3 deletions libwild/src/elf_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -480,7 +480,7 @@ fn write_file<'data, A: Arch<Platform = Elf>>(
write_object::<A>(s, buffers, table_writer, layout, trace, sym_index_map)?;
}
FileLayout::Prelude(s) => write_prelude::<A>(s, buffers, table_writer, layout)?,
FileLayout::Epilogue(s) => write_epilogue::<A>(s, buffers, table_writer, layout)?,
FileLayout::Epilogue(s) => write_epilogue::<A>(s, buffers, table_writer, layout, trace)?,
FileLayout::SyntheticSymbols(s) => write_synthetic_symbols::<A>(s, table_writer, layout)?,
FileLayout::LinkerScript(s) => write_linker_script_state::<A>(s, table_writer, layout)?,
FileLayout::NotLoaded => {}
Expand Down Expand Up @@ -1462,11 +1462,30 @@ fn write_object<'data, A: Arch<Platform = Elf>>(

let _span = debug_span!("write_file", filename = %object.input).entered();
let _file_span = layout.args().common().trace_span_for_file(object.file_id);

let Some(FileLayout::Epilogue(epilogue)) =
layout.group_layouts.last().and_then(|g| g.files.last())
else {
unreachable!("Epilogue is broken and must be the last file in the final layout group");
};

let mut is_harvested = vec![false; object.sections.len()];
for h in &epilogue.script_sorted_sections {
if h.file_id == object.file_id {
is_harvested[h.section_index.0] = true;
}
}

for (i, sec) in object.sections.iter().enumerate() {
let section_index = object::SectionIndex(i);

match sec {
SectionSlot::Loaded(sec) => {
// Skip if handled by Harvester
if is_harvested[i] {
continue;
}

write_object_section::<A>(
object,
layout,
Expand Down Expand Up @@ -3927,6 +3946,7 @@ fn write_epilogue<A: Arch<Platform = Elf>>(
buffers: &mut OutputSectionPartMap<&mut [u8]>,
table_writer: &mut TableWriter,
layout: &ElfLayout,
trace: &TraceOutput,
) -> Result {
verbose_timing_phase!("Write epilogue");

Expand Down Expand Up @@ -3964,15 +3984,32 @@ fn write_epilogue<A: Arch<Platform = Elf>>(
&epilogue_offsets,
)?;
}

// The actual build-id will be filled in later once all writing has completed. It's important
// that we fill it with zeros now however, since if we're overwriting an existing file, there
// might be other data there and we don't zero it, then the build ID will be hashing that data.
let build_id_buffer = buffers.get_mut(part_id::NOTE_GNU_BUILD_ID);
build_id_buffer.fill(0);

write_compressed_debug_sections(layout, buffers);
for harvested in &epilogue.script_sorted_sections {
let crate::layout::FileLayout::Object(object) = layout.file_layout(harvested.file_id)
else {
continue;
};

if let SectionSlot::Loaded(sec) = &object.sections[harvested.section_index.0] {
write_object_section::<A>(
object,
layout,
sec,
harvested.section_index,
buffers,
table_writer,
trace,
)?;
}
}

write_compressed_debug_sections(layout, buffers);
Ok(())
}

Expand Down
135 changes: 131 additions & 4 deletions libwild/src/layout.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ use crate::program_segments::ProgramSegments;
use crate::resolution;
use crate::resolution::NotLoaded;
use crate::resolution::ResolvedGroup;
use crate::resolution::ScriptSortedSectionDetail;
use crate::resolution::SectionSlot;
use crate::resolution::UnloadedSection;
use crate::sharding::ShardKey;
Expand Down Expand Up @@ -155,12 +156,18 @@ pub fn compute<'data, P: Platform, A: Arch<Platform = P>>(

let mut dynamic_symbol_definitions =
merge_dynamic_symbol_definitions(&group_states, &symbol_db)?;
let mut script_sorted_sections = harvest_and_sort_script_sections(
&mut group_states,
&output_sections,
&symbol_db.section_part_ids,
);

group_states.push(GroupState {
files: vec![FileLayoutState::Epilogue(EpilogueLayoutState::new(
symbol_db.args,
symbol_db.output_kind,
&mut dynamic_symbol_definitions,
script_sorted_sections.clone(),
))],
queue: LocalWorkQueue::new(epilogue_file_id.group()),
common: CommonGroupState::new(&output_sections),
Expand Down Expand Up @@ -290,14 +297,38 @@ pub fn compute<'data, P: Platform, A: Arch<Platform = P>>(
)?;

let mem_offsets: OutputSectionPartMap<u64> = starting_memory_offsets(&section_part_layouts);
let starting_mem_offsets_by_group = compute_start_offsets_by_group(&group_states, mem_offsets);
let starting_mem_offsets_by_group =
compute_start_offsets_by_group(&group_states, mem_offsets.clone());

let merged_string_start_addresses = MergedStringStartAddresses::compute(
&output_sections,
&starting_mem_offsets_by_group,
&merged_strings,
);

// --- SECTION SORTING & ADDRESS ASSIGNMENT ---

// At this stage, sections marked for sorting have been harvested but lack concrete memory
// addresses. We perform a two-step finalization:
// Sort the harvested sections according to the requested criteria.
// Linearize them in memory, starting from the base offset of their respective output section
// part, and advancing by the size of each section.
let mut harvested_sections_registry = Vec::with_capacity(script_sorted_sections.len());
let mut epilogue_offsets = starting_mem_offsets_by_group.last().unwrap().clone();

for sec in &mut script_sorted_sections {
let offset = epilogue_offsets.get_mut(sec.part_id);
// Ensure the memory address correctly aligns with CPU instruction requirements
*offset = sec.alignment.align_up(*offset);
sec.mem_offset = *offset;
*offset += sec.size;

// Push directly into our flat Vector—no hashing math required!
harvested_sections_registry.push(sec.clone());
}

harvested_sections_registry.sort_unstable_by_key(|s| (s.file_id, s.section_index.0));

let mut symbol_resolutions = SymbolResolutions {
resolutions: Vec::with_capacity(symbol_db.num_symbols()),
};
Expand All @@ -316,6 +347,7 @@ pub fn compute<'data, P: Platform, A: Arch<Platform = P>>(
let resources = FinaliseLayoutResources {
symbol_db: &symbol_db,
output_sections: &output_sections,
harvested_sections_registry: &harvested_sections_registry,
output_order: &output_order,
section_layouts: &section_layouts,
merged_string_start_addresses: &merged_string_start_addresses,
Expand Down Expand Up @@ -756,6 +788,7 @@ pub(crate) struct SyntheticSymbolsLayoutState<'data, P: Platform> {

pub(crate) struct EpilogueLayoutState<P: Platform> {
format_specific: P::EpilogueLayoutExt,
pub(crate) script_sorted_sections: Vec<HarvestedSortedSection>,
}

#[derive(Debug)]
Expand All @@ -775,6 +808,7 @@ pub(crate) struct SyntheticSymbolsLayout<'data, P: Platform> {
pub(crate) struct EpilogueLayout<P: Platform> {
pub(crate) format_specific: P::EpilogueLayoutExt,
pub(crate) dynsym_start_index: u32,
pub(crate) script_sorted_sections: Vec<HarvestedSortedSection>,
}

#[derive(Debug)]
Expand Down Expand Up @@ -1192,6 +1226,8 @@ pub(crate) struct ObjectLayoutState<'data, P: Platform> {
/// and later transferred to `ObjectLayout`.
section_relax_deltas: RelaxDeltaMap,

pub(crate) script_sorted_sections: Vec<ScriptSortedSectionDetail>,

/// Which ThunkBlock handles primary-part thunks for this object.
pub(crate) thunk_block_id: ThunkBlockId,

Expand Down Expand Up @@ -1319,6 +1355,7 @@ pub(crate) struct FinaliseLayoutResources<'scope, 'data, P: Platform> {
output_sections: &'scope OutputSections<'data, P>,
output_order: &'scope OutputOrder,
pub(crate) section_layouts: &'scope OutputSectionMap<OutputRecordLayout>,
pub(crate) harvested_sections_registry: &'scope [HarvestedSortedSection],
merged_string_start_addresses: &'scope MergedStringStartAddresses,
merged_strings: &'scope OutputSectionMap<MergedStringsSection<'data>>,
dynamic_symbol_definitions: &'scope Vec<DynamicSymbolDefinition<'data, P>>,
Expand Down Expand Up @@ -3579,9 +3616,11 @@ impl<'data, P: Platform> EpilogueLayoutState<P> {
args: &P::Args,
output_kind: OutputKind,
dynamic_symbol_definitions: &mut [DynamicSymbolDefinition<P>],
script_sorted_sections: Vec<HarvestedSortedSection>,
) -> Self {
EpilogueLayoutState {
format_specific: P::new_epilogue_layout(args, output_kind, dynamic_symbol_definitions),
script_sorted_sections,
}
}

Expand All @@ -3592,6 +3631,9 @@ impl<'data, P: Platform> EpilogueLayoutState<P> {
resources: &FinaliseSizesResources<'data, '_, P>,
) -> Result {
let mut extra_sizes = OutputSectionPartMap::with_size(common.mem_sizes.num_parts());
for sec in &self.script_sorted_sections {
extra_sizes.increment(sec.part_id, sec.size);
}
P::apply_late_size_adjustments_epilogue(
&mut self.format_specific,
total_sizes,
Expand Down Expand Up @@ -3645,10 +3687,16 @@ impl<'data, P: Platform> EpilogueLayoutState<P> {
dynsym_start_index,
resources.dynamic_symbol_definitions,
)?;

for sec in &mut self.script_sorted_sections {
let offset = memory_offsets.get_mut(sec.part_id);
*offset = sec.alignment.align_up(*offset);
sec.mem_offset = *offset;
*offset += sec.size;
}
Ok(EpilogueLayout {
format_specific: self.format_specific,
dynsym_start_index,
script_sorted_sections: self.script_sorted_sections,
})
}
}
Expand Down Expand Up @@ -3678,6 +3726,7 @@ fn new_object_layout_state<P: Platform>(
relocations: input_state.relocations,
format_specific: Default::default(),
section_relax_deltas: RelaxDeltaMap::new(),
script_sorted_sections: input_state.script_sorted_sections,
thunk_block_id: ThunkBlockId::default(),
owns_thunk_block: false,
post_gc_primary_bytes: 0,
Expand Down Expand Up @@ -3977,22 +4026,36 @@ impl<'data, P: Platform> ObjectLayoutState<'data, P> {
let section_id_range = self.section_id_range;
let object_part_ids = &resources.symbol_db.section_part_ids[section_id_range.as_usize()];

for (slot, &part_id) in self.sections.iter_mut().zip(object_part_ids) {
for (sec_idx, (slot, &part_id)) in self.sections.iter_mut().zip(object_part_ids).enumerate()
{
let resolution = match slot {
SectionSlot::Loaded(sec) => {
let address = *memory_offsets.get(part_id);
let mut address = *memory_offsets.get(part_id);

// TODO: We probably need to be able to handle sections that are ifuncs and
// sections that need a TLS GOT struct.
*memory_offsets.get_mut(part_id) +=
sec.capacity(part_id, resources.output_sections);

// Collect SFrame section ranges while we're already iterating
if part_id.output_section_id() == output_section_id::SFRAME {
let offset = (address - sframe_start_address) as usize;
let len = sec.size as usize;
sframe_ranges.push(offset..offset + len);
}

if let Ok(idx) = resources
.harvested_sections_registry
.binary_search_by_key(&(self.file_id, sec_idx), |s| {
(s.file_id, s.section_index.0)
})
{
address = resources.harvested_sections_registry[idx].mem_offset;
}

SectionResolution { address }
}

&mut SectionSlot::LoadedDebugInfo(sec) => {
let address = *memory_offsets.get(part_id);
*memory_offsets.get_mut(part_id) +=
Expand Down Expand Up @@ -5408,3 +5471,67 @@ impl OutputRecordLayout {
impl<'data, P: Platform> Drop for Layout<'data, P> {
fn drop(&mut self) {}
}

#[derive(Clone, Debug)]
pub(crate) struct HarvestedSortedSection {
pub(crate) file_id: FileId,
pub(crate) section_index: object::SectionIndex,
pub(crate) part_id: PartId,
pub(crate) size: u64,
pub(crate) alignment: Alignment,
pub(crate) mem_offset: u64,
//pub(crate) _name: &'data [u8],
}

fn harvest_and_sort_script_sections<'data, P: Platform>(
group_states: &mut [GroupState<'data, P>],
output_sections: &OutputSections<P>,
section_part_ids: &[PartId],
) -> Vec<HarvestedSortedSection> {
timing_phase!("Harvest and sort script sections");

let has_any_sorting = group_states.iter().any(|g| {
g.files.iter().any(|f| {
if let FileLayoutState::Object(obj) = f {
!obj.script_sorted_sections.is_empty()
} else {
false
}
})
});

if !has_any_sorting {
return Vec::new();
}

let mut temp = Vec::new();
for group in group_states.iter_mut() {
for file in &mut group.files {
if let FileLayoutState::Object(obj) = file {
for section_req in &obj.script_sorted_sections {
//for section_req in &obj.script_sorted_sections {
if let SectionSlot::Loaded(sec) = &obj.sections[section_req.index.0] {
let part_id = obj.section_part_id(section_req.index, section_part_ids);
let capacity = sec.capacity(part_id, output_sections);
temp.push((
obj.object
.section_name(section_req.index)
.unwrap_or_default(),
HarvestedSortedSection {
file_id: obj.file_id,
section_index: section_req.index,
part_id,
size: capacity,
alignment: part_id.alignment(output_sections),
mem_offset: 0,
},
));
}
}
}
}
}

temp.sort_by_key(|a| a.0);
temp.into_iter().map(|(_, harvested)| harvested).collect()
}
Loading