Skip to content

Commit d992139

Browse files
committed
refactor: optimize DEX string parsing using Cow
Update `Dex` and related structures to leverage `std::borrow::Cow` instead of `Rc` for string items. This change improves memory efficiency and reduces allocations during DEX file parsing. By using `Cow`, string data can be borrowed directly from the input buffer when possible, avoiding unnecessary heap allocations and `Rc` overhead for strings that don't require ownership.
1 parent 0d4795b commit d992139

1 file changed

Lines changed: 73 additions & 74 deletions

File tree

lib/src/modules/dex/parser.rs

Lines changed: 73 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use std::borrow::Cow;
12
use std::rc::Rc;
23

34
use nom::bytes::complete::take;
@@ -14,33 +15,33 @@ use crate::modules::utils::leb128::uleb128;
1415
type Error<'a> = nom::error::Error<&'a [u8]>;
1516

1617
#[derive(Default)]
17-
pub struct Dex {
18+
pub struct Dex<'a> {
1819
// DEX header information
1920
header: DexHeader,
2021

2122
// List with all found strings
22-
strings: Vec<Rc<String>>,
23+
strings: Vec<Cow<'a, str>>,
2324

2425
// List with all found types
25-
types: Vec<Rc<String>>,
26+
types: Vec<Cow<'a, str>>,
2627

2728
// List with all found prototypes
28-
protos: Vec<Rc<ProtoItem>>,
29+
protos: Vec<Rc<ProtoItem<'a>>>,
2930

3031
// List with all found fields
31-
fields: Vec<FieldItem>,
32+
fields: Vec<FieldItem<'a>>,
3233

3334
// List with all found methods
34-
methods: Vec<MethodItem>,
35+
methods: Vec<MethodItem<'a>>,
3536

3637
// List with all found classes
37-
class_defs: Vec<ClassItem>,
38+
class_defs: Vec<ClassItem<'a>>,
3839

3940
// Map information
4041
map_list: Option<MapList>,
4142
}
4243

43-
impl Dex {
44+
impl<'a> Dex<'a> {
4445
const ENDIAN_CONSTANT: u32 = 0x12345678;
4546
const REVERSE_ENDIAN_CONSTANT: u32 = 0x78563412;
4647
const DEX_HEADER_SIZE: u32 = 0x70;
@@ -53,7 +54,7 @@ impl Dex {
5354
const MAX_METHODS: usize = 1_000_000;
5455
const MAX_FIELDS: usize = 1_000_000;
5556

56-
pub fn parse<'a>(data: &'a [u8]) -> Result<Self, Err<Error<'a>>> {
57+
pub fn parse(data: &'a [u8]) -> Result<Self, Err<Error<'a>>> {
5758
// Extract dex header with information about data location
5859
let (_, header) = Self::parse_dex_header(data)?;
5960

@@ -197,9 +198,9 @@ impl Dex {
197198
///
198199
/// See: https://source.android.com/docs/core/runtime/dex-format#string-item
199200
fn parse_strings(
200-
data: &[u8],
201+
data: &'a [u8],
201202
header: &DexHeader,
202-
) -> Vec<Rc<String>> {
203+
) -> Vec<Cow<'a, str>> {
203204
// DEX file doesn't contain strings.
204205
// It's a strange case, but it needs to be checked.
205206
if header.string_ids_off == 0 {
@@ -218,7 +219,6 @@ impl Dex {
218219
.take(Self::MAX_STRINGS)
219220
.take(header.string_ids_size as usize)
220221
.filter_map(|offset| Self::parse_string_from_offset(data, offset))
221-
.map(Rc::new)
222222
.collect();
223223

224224
let _ = it.finish();
@@ -234,28 +234,25 @@ impl Dex {
234234
/// Strings larger than 64KB will be considered invalid and the result will
235235
/// be None.
236236
fn parse_string_from_offset(
237-
data: &[u8],
237+
data: &'a [u8],
238238
string_data_offset: u32,
239-
) -> Option<String> {
239+
) -> Option<Cow<'a, str>> {
240240
if string_data_offset < Self::DEX_HEADER_SIZE {
241241
return None;
242242
}
243243

244-
data.get(string_data_offset as usize..).and_then(|data| {
245-
let (data, utf16_size) = uleb128(data).ok()?;
244+
data.get(string_data_offset as usize..).and_then(|slice| {
245+
let (slice, utf16_size) = uleb128(slice).ok()?;
246246

247247
if utf16_size > 65536 || (utf16_size as usize) > data.len() {
248248
return None;
249249
}
250250

251251
let (_, bytes) =
252-
take::<usize, &[u8], Error>(utf16_size as usize)(data).ok()?;
252+
take::<usize, &[u8], Error>(utf16_size as usize)(slice).ok()?;
253253

254-
// Decode MUTF-8 string and save it
255-
match simd_cesu8::mutf8::decode(bytes) {
256-
Ok(v) => Some(v.to_string()),
257-
Err(_) => None,
258-
}
254+
// Decode MUTF-8 string and return Cow<'a, str>
255+
simd_cesu8::mutf8::decode(bytes).ok()
259256
})
260257
}
261258

@@ -266,10 +263,10 @@ impl Dex {
266263
///
267264
/// See: https://source.android.com/docs/core/runtime/dex-format#type-id-item
268265
fn parse_types(
269-
data: &[u8],
266+
data: &'a [u8],
270267
header: &DexHeader,
271-
string_items: &[Rc<String>],
272-
) -> Vec<Rc<String>> {
268+
string_items: &[Cow<'a, str>],
269+
) -> Vec<Cow<'a, str>> {
273270
// DEX file doesn't contain types.
274271
// It's a strange case, but it needs to be checked.
275272
if header.type_ids_off == 0 {
@@ -300,11 +297,11 @@ impl Dex {
300297
/// See: https://source.android.com/docs/core/runtime/dex-format#proto-id-item
301298
/// See: https://source.android.com/docs/core/runtime/dex-format#type-list
302299
fn parse_protos(
303-
data: &[u8],
300+
data: &'a [u8],
304301
header: &DexHeader,
305-
string_items: &[Rc<String>],
306-
type_items: &[Rc<String>],
307-
) -> Vec<Rc<ProtoItem>> {
302+
string_items: &[Cow<'a, str>],
303+
type_items: &[Cow<'a, str>],
304+
) -> Vec<Rc<ProtoItem<'a>>> {
308305
// DEX file doesn't contain prototypes.
309306
// It's a strange case, but it needs to be checked.
310307
if header.proto_ids_off == 0 {
@@ -354,10 +351,10 @@ impl Dex {
354351
///
355352
/// See: https://source.android.com/docs/core/runtime/dex-format#type-list
356353
fn parse_type_list(
357-
data: &[u8],
358-
type_items: &[Rc<String>],
354+
data: &'a [u8],
355+
type_items: &[Cow<'a, str>],
359356
offset: u32,
360-
) -> Option<Vec<Rc<String>>> {
357+
) -> Option<Vec<Cow<'a, str>>> {
361358
let remainder = data.get(offset as usize..)?;
362359
let (remainder, size) = le_u32::<&[u8], Error>(remainder).ok()?;
363360

@@ -383,11 +380,11 @@ impl Dex {
383380
///
384381
/// See: https://source.android.com/docs/core/runtime/dex-format#field-id-item
385382
fn parse_fields(
386-
data: &[u8],
383+
data: &'a [u8],
387384
header: &DexHeader,
388-
string_items: &[Rc<String>],
389-
type_items: &[Rc<String>],
390-
) -> Vec<FieldItem> {
385+
string_items: &[Cow<'a, str>],
386+
type_items: &[Cow<'a, str>],
387+
) -> Vec<FieldItem<'a>> {
391388
// DEX file doesn't contain fields.
392389
// It's a strange case, but it needs to be checked.
393390
if header.field_ids_off == 0 {
@@ -423,12 +420,12 @@ impl Dex {
423420
///
424421
/// See: https://source.android.com/docs/core/runtime/dex-format#method-id-item
425422
fn parse_methods(
426-
data: &[u8],
423+
data: &'a [u8],
427424
header: &DexHeader,
428-
string_items: &[Rc<String>],
429-
type_items: &[Rc<String>],
430-
proto_items: &[Rc<ProtoItem>],
431-
) -> Vec<MethodItem> {
425+
string_items: &[Cow<'a, str>],
426+
type_items: &[Cow<'a, str>],
427+
proto_items: &[Rc<ProtoItem<'a>>],
428+
) -> Vec<MethodItem<'a>> {
432429
// DEX file doesn't contain methods
433430
// It's a strange case, but it needs to be checked.
434431
if header.method_ids_off == 0 {
@@ -466,11 +463,11 @@ impl Dex {
466463
///
467464
/// See: https://source.android.com/docs/core/runtime/dex-format#class-def-item
468465
fn parse_class_defs(
469-
data: &[u8],
466+
data: &'a [u8],
470467
header: &DexHeader,
471-
string_items: &[Rc<String>],
472-
type_items: &[Rc<String>],
473-
) -> Vec<ClassItem> {
468+
string_items: &[Cow<'a, str>],
469+
type_items: &[Cow<'a, str>],
470+
) -> Vec<ClassItem<'a>> {
474471
// DEX file doesn't contain classess
475472
// It's a strange case, but it needs to be checked.
476473
if header.class_defs_off == 0 {
@@ -643,33 +640,33 @@ struct DexHeader {
643640
}
644641

645642
#[derive(Debug)]
646-
pub struct ProtoItem {
647-
shorty: Rc<String>,
648-
return_type: Rc<String>,
643+
pub struct ProtoItem<'a> {
644+
shorty: Cow<'a, str>,
645+
return_type: Cow<'a, str>,
649646
parameters_count: u32,
650-
parameters: Vec<Rc<String>>,
647+
parameters: Vec<Cow<'a, str>>,
651648
}
652649

653650
#[derive(Debug)]
654-
pub struct FieldItem {
655-
class: Rc<String>,
656-
type_: Rc<String>,
657-
name: Rc<String>,
651+
pub struct FieldItem<'a> {
652+
class: Cow<'a, str>,
653+
type_: Cow<'a, str>,
654+
name: Cow<'a, str>,
658655
}
659656

660657
#[derive(Debug)]
661-
pub struct MethodItem {
662-
class: Rc<String>,
663-
proto: Rc<ProtoItem>,
664-
name: Rc<String>,
658+
pub struct MethodItem<'a> {
659+
class: Cow<'a, str>,
660+
proto: Rc<ProtoItem<'a>>,
661+
name: Cow<'a, str>,
665662
}
666663

667664
#[derive(Debug)]
668-
pub struct ClassItem {
669-
class: Rc<String>,
665+
pub struct ClassItem<'a> {
666+
class: Cow<'a, str>,
670667
access_flags: u32,
671-
superclass: Option<Rc<String>>,
672-
source_file: Option<Rc<String>>,
668+
superclass: Option<Cow<'a, str>>,
669+
source_file: Option<Cow<'a, str>>,
673670
}
674671

675672
#[derive(Default)]
@@ -686,17 +683,19 @@ pub struct MapItem {
686683
offset: u32,
687684
}
688685

689-
impl From<Dex> for protos::dex::Dex {
690-
fn from(dex: Dex) -> Self {
686+
impl<'a> From<Dex<'a>> for protos::dex::Dex {
687+
fn from(dex: Dex<'a>) -> Self {
691688
let mut result = protos::dex::Dex::new();
692689

693690
result.set_is_dex(true);
694691
result.header = MessageField::some(dex.header.clone().into());
695692

696693
result
697694
.strings
698-
.extend(dex.strings.into_iter().map(|x| x.as_ref().clone()));
699-
result.types.extend(dex.types.into_iter().map(|x| x.as_ref().clone()));
695+
.extend(dex.strings.into_iter().map(|x| x.to_string()));
696+
result
697+
.types
698+
.extend(dex.types.into_iter().map(|x| x.to_string()));
700699
result.protos.extend(
701700
dex.protos
702701
.iter()
@@ -739,23 +738,23 @@ impl From<DexHeader> for protos::dex::DexHeader {
739738
}
740739
}
741740

742-
impl From<&ProtoItem> for protos::dex::ProtoItem {
743-
fn from(value: &ProtoItem) -> Self {
741+
impl<'a> From<&ProtoItem<'a>> for protos::dex::ProtoItem {
742+
fn from(value: &ProtoItem<'a>) -> Self {
744743
let mut result = protos::dex::ProtoItem::new();
745744

746745
result.shorty = Some(value.shorty.to_string());
747746
result.return_type = Some(value.return_type.to_string());
748747
result.set_parameters_count(value.parameters_count);
749748
result
750749
.parameters
751-
.extend(value.parameters.iter().map(|x| x.as_ref().into()));
750+
.extend(value.parameters.iter().map(|x| x.to_string()));
752751

753752
result
754753
}
755754
}
756755

757-
impl From<&FieldItem> for protos::dex::FieldItem {
758-
fn from(value: &FieldItem) -> Self {
756+
impl<'a> From<&FieldItem<'a>> for protos::dex::FieldItem {
757+
fn from(value: &FieldItem<'a>) -> Self {
759758
let mut result = protos::dex::FieldItem::new();
760759

761760
result.class = Some(value.class.to_string());
@@ -766,8 +765,8 @@ impl From<&FieldItem> for protos::dex::FieldItem {
766765
}
767766
}
768767

769-
impl From<&MethodItem> for protos::dex::MethodItem {
770-
fn from(value: &MethodItem) -> Self {
768+
impl<'a> From<&MethodItem<'a>> for protos::dex::MethodItem {
769+
fn from(value: &MethodItem<'a>) -> Self {
771770
let mut result = protos::dex::MethodItem::new();
772771

773772
result.class = Some(value.class.to_string());
@@ -778,8 +777,8 @@ impl From<&MethodItem> for protos::dex::MethodItem {
778777
}
779778
}
780779

781-
impl From<&ClassItem> for protos::dex::ClassItem {
782-
fn from(value: &ClassItem) -> Self {
780+
impl<'a> From<&ClassItem<'a>> for protos::dex::ClassItem {
781+
fn from(value: &ClassItem<'a>) -> Self {
783782
let mut result = protos::dex::ClassItem::new();
784783

785784
result.class = Some(value.class.to_string());

0 commit comments

Comments
 (0)