@@ -92,8 +92,9 @@ pub enum DataKind {
9292 Int ( u8 ) ,
9393 /// Floating point value of the given size in bytes.
9494 Float ( u8 ) ,
95- /// String literal of the given total length in bytes.
96- String ( u64 ) ,
95+ /// String literal of the given total length in bytes, with the character width in bytes
96+ /// (1 for C/UTF-8, 2 for UTF-16, 4 for UTF-32).
97+ String { len : u64 , char_width : u8 } ,
9798}
9899
99100/// The per-operand number formats IDA recorded for an instruction.
@@ -468,14 +469,25 @@ impl IDBFileParser {
468469 ByteDataType :: Float => Some ( DataKind :: Float ( 4 ) ) ,
469470 ByteDataType :: Double => Some ( DataKind :: Float ( 8 ) ) ,
470471 ByteDataType :: Tbyte => Some ( DataKind :: Float ( 10 ) ) ,
471- ByteDataType :: Strlit => Some ( DataKind :: String ( size as u64 ) ) ,
472+ ByteDataType :: Strlit => {
473+ // Recover the character width so wide (UTF-16/32) strings are not mistyped as
474+ // single-byte. Defaults to one byte when no explicit string type is recorded.
475+ let char_width = AddressInfo :: new ( id0, id1, id2, netdelta, address)
476+ . and_then ( |info| info. str_type ( ) )
477+ . map ( |str_type| str_char_width ( str_type. width ) )
478+ . unwrap_or ( 1 ) ;
479+ Some ( DataKind :: String {
480+ len : size as u64 ,
481+ char_width,
482+ } )
483+ }
472484 // Structs carry their actual type in the TIL; resolve it below. Alignment fill
473485 // and vector/custom kinds have no simple mapping and are skipped.
474486 _ => None ,
475487 } ;
476488
477489 // A struct item only makes sense with its real type; look it up. Avoid the per-item
478- // type lookup for the (vastly more common) scalar/string items.
490+ // type lookup for the (vastly more common) scalar items.
479491 let ty = if matches ! ( data. data_type( ) , ByteDataType :: Struct ) {
480492 AddressInfo :: new ( id0, id1, id2, netdelta, address)
481493 . and_then ( |info| info. tinfo ( & root_info) . ok ( ) . flatten ( ) )
@@ -816,6 +828,16 @@ impl IDBFileParser {
816828 }
817829}
818830
831+ /// The byte width of an IDA string character width.
832+ fn str_char_width ( width : idb_rs:: addr_info:: StrWidth ) -> u8 {
833+ use idb_rs:: addr_info:: StrWidth ;
834+ match width {
835+ StrWidth :: Byte => 1 ,
836+ StrWidth :: Word => 2 ,
837+ StrWidth :: Dword => 4 ,
838+ }
839+ }
840+
819841/// Map an IDA operand representation to the subset of number formats we can apply directly.
820842///
821843/// Enum, segment, stack-variable, struct-offset, forced and custom representations need extra
0 commit comments