Skip to content

Commit a61a757

Browse files
committed
[idb_import] Type string data with their character width
Use the string type idb-rs now exposes to size string data variables by their real character width: a 1-byte string stays a char array, while UTF-16/UTF-32 strings become wide-character arrays (with the element count being the character count) instead of being mistyped as a byte array.
1 parent f0142e4 commit a61a757

2 files changed

Lines changed: 37 additions & 5 deletions

File tree

plugins/idb_import/src/mapper.rs

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -684,7 +684,17 @@ impl IDBMapper {
684684
match kind {
685685
DataKind::Int(bytes) => Type::int(bytes as usize, false),
686686
DataKind::Float(bytes) => Type::float(bytes as usize),
687-
DataKind::String(len) => Type::array(&Type::char(), len),
687+
DataKind::String { len, char_width } => {
688+
// Use a wide character for UTF-16/UTF-32 so the string renders correctly
689+
// instead of as a byte array; the element count is the character count.
690+
let element = if char_width <= 1 {
691+
Type::char()
692+
} else {
693+
Type::wide_char(char_width as usize)
694+
};
695+
let count = len / char_width.max(1) as u64;
696+
Type::array(&element, count)
697+
}
688698
}
689699
} else {
690700
return;

plugins/idb_import/src/parse.rs

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -92,8 +92,9 @@ pub enum DataKind {
9292
Int(u8),
9393
/// Floating point value of the given size in bytes.
9494
Float(u8),
95-
/// String literal of the given total length in bytes.
96-
String(u64),
95+
/// String literal of the given total length in bytes, with the character width in bytes
96+
/// (1 for C/UTF-8, 2 for UTF-16, 4 for UTF-32).
97+
String { len: u64, char_width: u8 },
9798
}
9899

99100
/// The per-operand number formats IDA recorded for an instruction.
@@ -468,14 +469,25 @@ impl IDBFileParser {
468469
ByteDataType::Float => Some(DataKind::Float(4)),
469470
ByteDataType::Double => Some(DataKind::Float(8)),
470471
ByteDataType::Tbyte => Some(DataKind::Float(10)),
471-
ByteDataType::Strlit => Some(DataKind::String(size as u64)),
472+
ByteDataType::Strlit => {
473+
// Recover the character width so wide (UTF-16/32) strings are not mistyped as
474+
// single-byte. Defaults to one byte when no explicit string type is recorded.
475+
let char_width = AddressInfo::new(id0, id1, id2, netdelta, address)
476+
.and_then(|info| info.str_type())
477+
.map(|str_type| str_char_width(str_type.width))
478+
.unwrap_or(1);
479+
Some(DataKind::String {
480+
len: size as u64,
481+
char_width,
482+
})
483+
}
472484
// Structs carry their actual type in the TIL; resolve it below. Alignment fill
473485
// and vector/custom kinds have no simple mapping and are skipped.
474486
_ => None,
475487
};
476488

477489
// A struct item only makes sense with its real type; look it up. Avoid the per-item
478-
// type lookup for the (vastly more common) scalar/string items.
490+
// type lookup for the (vastly more common) scalar items.
479491
let ty = if matches!(data.data_type(), ByteDataType::Struct) {
480492
AddressInfo::new(id0, id1, id2, netdelta, address)
481493
.and_then(|info| info.tinfo(&root_info).ok().flatten())
@@ -816,6 +828,16 @@ impl IDBFileParser {
816828
}
817829
}
818830

831+
/// The byte width of an IDA string character width.
832+
fn str_char_width(width: idb_rs::addr_info::StrWidth) -> u8 {
833+
use idb_rs::addr_info::StrWidth;
834+
match width {
835+
StrWidth::Byte => 1,
836+
StrWidth::Word => 2,
837+
StrWidth::Dword => 4,
838+
}
839+
}
840+
819841
/// Map an IDA operand representation to the subset of number formats we can apply directly.
820842
///
821843
/// Enum, segment, stack-variable, struct-offset, forced and custom representations need extra

0 commit comments

Comments
 (0)