Skip to content

Commit ced97c8

Browse files
committed
[idb_import] Apply operand display overrides after functions exist
Per-operand enum displays and number formats are applied with set_int_display_type, which needs the function containing the instruction. The IDB import runs as an early analysis activity, before functions are created, so functions_containing() returned empty and every override was silently dropped. Stash the rebased overrides in a per-view registry during the import and apply them from a BinaryViewInitialAnalysisCompletionEvent handler once functions exist, then request re-analysis so they render. Two further fixes make the overrides take effect: - Key the override by Binary Ninja's operand index, defined as the number of operand-separator tokens before the token in the rendered instruction, not IDA's operand number (e.g. the immediate of `orr w1, w8, #imm` is operand 2, while IDA records it as operand 1). Both the enum and number-format passes now count operand separators. - Apply enum displays before number formats and skip the format pass for addresses that carry an enum operand. IDA shows the enumeration even when the operand also has a number-format flag, so the format must not overwrite the enum override at the same operand.
1 parent 53a8d79 commit ced97c8

2 files changed

Lines changed: 240 additions & 111 deletions

File tree

plugins/idb_import/src/lib.rs

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,29 @@
11
use crate::mapper::IDBMapper;
22
use crate::parse::IDBFileParser;
33
use crate::settings::LoadSettings;
4-
use binaryninja::binary_view::AnalysisContext;
4+
use binaryninja::binary_view::{
5+
register_binary_view_event, AnalysisContext, BinaryView, BinaryViewEventHandler,
6+
BinaryViewEventType,
7+
};
58
use binaryninja::workflow::{activity, Activity, Workflow};
69
use std::fs::File;
710
use std::io::BufReader;
811

12+
/// Applies the per-operand display overrides (number formats and enum displays) that the import
13+
/// deferred because they require the view's functions to exist. By the time initial analysis
14+
/// completes the functions are present, so the overrides can be set and a re-analysis requested to
15+
/// render them.
16+
struct OperandDisplayApplier;
17+
18+
impl BinaryViewEventHandler for OperandDisplayApplier {
19+
fn on_event(&self, view: &BinaryView) {
20+
if crate::mapper::apply_pending_operand_display(view) {
21+
// The overrides only affect rendering once analysis re-runs over the functions.
22+
view.update_analysis();
23+
}
24+
}
25+
}
26+
927
mod commands;
1028
pub mod mapper;
1129
pub mod parse;
@@ -24,6 +42,12 @@ fn plugin_init() -> Result<(), ()> {
2442
// Register settings globally.
2543
LoadSettings::register();
2644

45+
// Apply deferred per-operand display overrides once functions exist.
46+
register_binary_view_event(
47+
BinaryViewEventType::BinaryViewInitialAnalysisCompletionEvent,
48+
OperandDisplayApplier,
49+
);
50+
2751
let loader_activity = |ctx: &AnalysisContext| {
2852
let view = ctx.view();
2953
let load_settings = LoadSettings::from_view_settings(&view);

plugins/idb_import/src/mapper.rs

Lines changed: 215 additions & 110 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,185 @@ use idb_rs::id0::SegmentType;
2222
use idb_rs::til::TypeVariant;
2323
use sha2::{Digest, Sha256};
2424
use std::collections::{HashMap, HashSet};
25+
use std::sync::{Mutex, OnceLock};
26+
27+
/// Per-operand display overrides (number formats and enum displays) that have been parsed and
28+
/// rebased but cannot be applied yet because they require the view's functions to exist.
29+
///
30+
/// `set_int_display_type` needs the [`Function`] containing an instruction, but the IDB import runs
31+
/// as an early analysis activity, before functions are created. We stash the rebased overrides here
32+
/// keyed by the view, then a [`crate`]-level analysis-completion handler applies them once functions
33+
/// are available (see `lib.rs`).
34+
pub struct PendingOperandDisplay {
35+
pub operand_formats: Vec<OperandFormatInfo>,
36+
pub operand_enums: Vec<OperandEnumInfo>,
37+
}
38+
39+
fn pending_operand_display() -> &'static Mutex<HashMap<usize, PendingOperandDisplay>> {
40+
static REGISTRY: OnceLock<Mutex<HashMap<usize, PendingOperandDisplay>>> = OnceLock::new();
41+
REGISTRY.get_or_init(|| Mutex::new(HashMap::new()))
42+
}
43+
44+
/// A stable per-view key shared between the import activity and the completion handler. The
45+
/// completion event wraps the same underlying `BNBinaryView`, so its handle pointer matches.
46+
fn view_key(view: &BinaryView) -> usize {
47+
view.handle as usize
48+
}
49+
50+
/// Stash rebased operand-display overrides to be applied once the view's functions exist.
51+
fn stash_operand_display(view: &BinaryView, pending: PendingOperandDisplay) {
52+
if let Ok(mut registry) = pending_operand_display().lock() {
53+
registry.insert(view_key(view), pending);
54+
}
55+
}
56+
57+
/// Apply (and remove) any operand-display overrides stashed for this view.
58+
///
59+
/// Enum displays (a handful) are applied first and rendered promptly; the much larger number-format
60+
/// set is applied afterwards. Each phase requests its own re-analysis so the overrides become
61+
/// visible. Returns whether any overrides were applied.
62+
pub fn apply_pending_operand_display(view: &BinaryView) -> bool {
63+
let key = view_key(view);
64+
let Some(pending) = pending_operand_display()
65+
.lock()
66+
.ok()
67+
.and_then(|mut registry| registry.remove(&key))
68+
else {
69+
tracing::debug!("No pending operand display overrides for view {key:#x}");
70+
return false;
71+
};
72+
73+
tracing::info!(
74+
"Applying deferred operand display: {} enums, {} number formats",
75+
pending.operand_enums.len(),
76+
pending.operand_formats.len()
77+
);
78+
79+
// Addresses whose operand is displayed as an enumeration. IDA shows the enum even when the
80+
// operand also carries a number-format flag, so the enum must win: skip the format pass for
81+
// these addresses, otherwise it would overwrite the enum override at the same operand.
82+
let enum_addresses: HashSet<u64> =
83+
pending.operand_enums.iter().map(|e| e.address).collect();
84+
85+
if !pending.operand_enums.is_empty() {
86+
for operand_enum in &pending.operand_enums {
87+
apply_operand_enum_display(view, operand_enum);
88+
}
89+
tracing::info!("Applied {} enum-displayed operands", pending.operand_enums.len());
90+
view.update_analysis();
91+
}
92+
93+
if !pending.operand_formats.is_empty() {
94+
let mut applied_formats = 0usize;
95+
for operand_format in &pending.operand_formats {
96+
if enum_addresses.contains(&operand_format.address) {
97+
continue;
98+
}
99+
apply_operand_format_display(view, operand_format);
100+
applied_formats += 1;
101+
}
102+
tracing::info!("Applied {applied_formats} operand number formats");
103+
view.update_analysis();
104+
}
105+
106+
!pending.operand_enums.is_empty() || !pending.operand_formats.is_empty()
107+
}
108+
109+
/// Display an operand against its enumeration, as IDA does.
110+
///
111+
/// Resolves the enumeration's Binary Ninja type id and sets it on each immediate token in the
112+
/// instruction. The override is keyed by Binary Ninja's operand index, which `set_int_display_type`
113+
/// defines as the number of operand-separator tokens preceding the token in the rendered line (not
114+
/// IDA's operand number). Counting separators here makes the override match what Binary Ninja
115+
/// renders, so the enumeration member is displayed once analysis re-renders the function.
116+
fn apply_operand_enum_display(view: &BinaryView, operand_enum: &OperandEnumInfo) {
117+
let Some(type_id) = view.type_id_by_name(operand_enum.enum_name.as_str()) else {
118+
tracing::debug!(
119+
"No Binary Ninja type for enum '{}', skipping operand at {:0x}",
120+
operand_enum.enum_name,
121+
operand_enum.address
122+
);
123+
return;
124+
};
125+
126+
let functions = view.functions_containing(operand_enum.address);
127+
let Some(func) = functions.iter().next() else {
128+
tracing::info!(
129+
"apply_operand_enum_display: no function containing {:#x} for enum '{}'",
130+
operand_enum.address,
131+
operand_enum.enum_name
132+
);
133+
return;
134+
};
135+
let arch = func.arch();
136+
137+
let bytes = view.read_vec(operand_enum.address, 16);
138+
if bytes.is_empty() {
139+
return;
140+
}
141+
let Some((_consumed, tokens)) = arch.instruction_text(&bytes, operand_enum.address) else {
142+
return;
143+
};
144+
145+
let mut separators = 0usize;
146+
for token in &tokens {
147+
if matches!(token.kind, InstructionTextTokenKind::OperandSeparator) {
148+
separators += 1;
149+
}
150+
if let Some(value) = integer_token_value(&token.kind) {
151+
func.set_int_display_type(
152+
operand_enum.address,
153+
value,
154+
separators,
155+
IntegerDisplayType::EnumerationDisplayType,
156+
Some(arch),
157+
Some(type_id.as_str()),
158+
);
159+
}
160+
}
161+
}
162+
163+
/// Apply IDA's per-operand number formats to the instruction at an address.
164+
///
165+
/// Like the enum pass, the override is keyed by Binary Ninja's operand index (the count of
166+
/// operand-separator tokens before the token), not IDA's operand number. Each recovered format is
167+
/// applied to every immediate token at its rendered operand index; instructions carrying a format
168+
/// almost always have a single immediate, so this matches what IDA formatted.
169+
fn apply_operand_format_display(view: &BinaryView, operand_format: &OperandFormatInfo) {
170+
let functions = view.functions_containing(operand_format.address);
171+
let Some(func) = functions.iter().next() else {
172+
return;
173+
};
174+
let arch = func.arch();
175+
176+
// The longest instruction we may encounter; reading a few extra bytes is harmless.
177+
let bytes = view.read_vec(operand_format.address, 16);
178+
if bytes.is_empty() {
179+
return;
180+
}
181+
let Some((_consumed, tokens)) = arch.instruction_text(&bytes, operand_format.address) else {
182+
return;
183+
};
184+
185+
let mut separators = 0usize;
186+
for token in &tokens {
187+
if matches!(token.kind, InstructionTextTokenKind::OperandSeparator) {
188+
separators += 1;
189+
}
190+
if let Some(value) = integer_token_value(&token.kind) {
191+
for (_operand_index, format) in &operand_format.formats {
192+
func.set_int_display_type(
193+
operand_format.address,
194+
value,
195+
separators,
196+
integer_display_type(*format),
197+
Some(arch),
198+
None,
199+
);
200+
}
201+
}
202+
}
203+
}
25204

26205
/// Maps IDB data into a [`BinaryView`].
27206
///
@@ -216,122 +395,48 @@ impl IDBMapper {
216395
}
217396
}
218397

219-
// Apply per-operand number formats to the disassembly, if the user opted in. This is
220-
// gated because it disassembles each formatted instruction.
221-
if self.apply_operand_formats && !self.info.operand_formats.is_empty() {
222-
tracing::info!(
223-
"Applying {} operand formats",
224-
self.info.operand_formats.len()
225-
);
226-
for operand_format in &self.info.operand_formats {
227-
let mut rebased = operand_format.clone();
228-
rebased.address = rebase(operand_format.address);
229-
self.map_operand_format_to_view(view, &rebased);
230-
}
231-
}
232-
233-
// Apply enum-displayed operands (same gating, since it also disassembles each operand).
234-
if self.apply_operand_formats && !self.info.operand_enums.is_empty() {
398+
// Per-operand number formats and enum displays need the containing function to exist, but
399+
// this import runs before functions are created. Rebase and stash them; the
400+
// analysis-completion handler in `lib.rs` applies them once functions are available. Gated
401+
// because applying them disassembles each affected instruction.
402+
if self.apply_operand_formats
403+
&& (!self.info.operand_formats.is_empty() || !self.info.operand_enums.is_empty())
404+
{
235405
tracing::info!(
236-
"Applying {} enum-displayed operands",
406+
"Deferring {} operand formats and {} enum-displayed operands until analysis completes",
407+
self.info.operand_formats.len(),
237408
self.info.operand_enums.len()
238409
);
239-
for operand_enum in &self.info.operand_enums {
240-
let mut rebased = operand_enum.clone();
241-
rebased.address = rebase(operand_enum.address);
242-
self.map_operand_enum_to_view(view, &rebased);
243-
}
244-
}
245-
246-
// self.map_used_types_to_view(view, &til_translator);
247-
}
248-
249-
/// Display an operand against its enumeration, as IDA does.
250-
///
251-
/// Resolves the enumeration's Binary Ninja type id and sets it on the operand for every
252-
/// immediate value in the instruction; like the number-format pass, an entry only takes
253-
/// effect for the exact (value, operand) Binary Ninja renders.
254-
fn map_operand_enum_to_view(&self, view: &BinaryView, operand_enum: &OperandEnumInfo) {
255-
let Some(type_id) = view.type_id_by_name(operand_enum.enum_name.as_str()) else {
256-
tracing::debug!(
257-
"No Binary Ninja type for enum '{}', skipping operand at {:0x}",
258-
operand_enum.enum_name,
259-
operand_enum.address
410+
let operand_formats = self
411+
.info
412+
.operand_formats
413+
.iter()
414+
.map(|operand_format| {
415+
let mut rebased = operand_format.clone();
416+
rebased.address = rebase(operand_format.address);
417+
rebased
418+
})
419+
.collect();
420+
let operand_enums = self
421+
.info
422+
.operand_enums
423+
.iter()
424+
.map(|operand_enum| {
425+
let mut rebased = operand_enum.clone();
426+
rebased.address = rebase(operand_enum.address);
427+
rebased
428+
})
429+
.collect();
430+
stash_operand_display(
431+
view,
432+
PendingOperandDisplay {
433+
operand_formats,
434+
operand_enums,
435+
},
260436
);
261-
return;
262-
};
263-
264-
let functions = view.functions_containing(operand_enum.address);
265-
let Some(func) = functions.iter().next() else {
266-
return;
267-
};
268-
let arch = func.arch();
269-
270-
let bytes = view.read_vec(operand_enum.address, 16);
271-
if bytes.is_empty() {
272-
return;
273-
}
274-
let Some((_consumed, tokens)) = arch.instruction_text(&bytes, operand_enum.address) else {
275-
return;
276-
};
277-
278-
for token in &tokens {
279-
if let Some(value) = integer_token_value(&token.kind) {
280-
func.set_int_display_type(
281-
operand_enum.address,
282-
value,
283-
operand_enum.operand as usize,
284-
IntegerDisplayType::EnumerationDisplayType,
285-
Some(arch),
286-
Some(type_id.as_str()),
287-
);
288-
}
289437
}
290-
}
291438

292-
/// Apply IDA's per-operand number formats to the instruction at an address.
293-
///
294-
/// `set_int_display_type` only takes effect when Binary Ninja renders a token with the exact
295-
/// (value, operand) we set, so disassembling the instruction to recover its immediate values
296-
/// and setting each value under every formatted operand index is safe: combinations that do
297-
/// not occur are simply never matched.
298-
fn map_operand_format_to_view(&self, view: &BinaryView, operand_format: &OperandFormatInfo) {
299-
let functions = view.functions_containing(operand_format.address);
300-
let Some(func) = functions.iter().next() else {
301-
return;
302-
};
303-
let arch = func.arch();
304-
305-
// The longest instruction we may encounter; reading a few extra bytes is harmless.
306-
let bytes = view.read_vec(operand_format.address, 16);
307-
if bytes.is_empty() {
308-
return;
309-
}
310-
let Some((_consumed, tokens)) = arch.instruction_text(&bytes, operand_format.address) else {
311-
return;
312-
};
313-
314-
let values: Vec<u64> = tokens
315-
.iter()
316-
.filter_map(|token| integer_token_value(&token.kind))
317-
.collect();
318-
if values.is_empty() {
319-
return;
320-
}
321-
322-
for (operand_index, format) in &operand_format.formats {
323-
let display_type = integer_display_type(*format);
324-
for value in &values {
325-
func.set_int_display_type(
326-
operand_format.address,
327-
*value,
328-
*operand_index as usize,
329-
display_type,
330-
Some(arch),
331-
None,
332-
);
333-
}
334-
}
439+
// self.map_used_types_to_view(view, &til_translator);
335440
}
336441

337442
pub fn map_types_to_view(

0 commit comments

Comments
 (0)