Skip to content

Commit 0d4795b

Browse files
committed
refactor: enforce absolute table lookups in dex module.
This commit refactors the DEX module parser to ensure strict compliance with the DEX specification, and resolve potential parsing failures on obfuscated or padded files. * Refactored table parser signatures (`parse_strings`, `parse_types`, `parse_protos`, `parse_fields`, `parse_methods`, `parse_class_defs`) to eliminate intermediate `xxx_offset` remainder slices. Functions now receive the complete data buffer and `DexHeader`, returning `Vec` directly. * Implemented absolute table offset lookups (`data.get(header.table_off..)`) across all table parsers instead of sequential `remainder` parsing. This prevents misalignment and out-of-bounds errors on DEX files containing section padding or rearranged tables.
1 parent 38fdf8b commit 0d4795b

1 file changed

Lines changed: 91 additions & 60 deletions

File tree

lib/src/modules/dex/parser.rs

Lines changed: 91 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -55,36 +55,31 @@ impl Dex {
5555

5656
pub fn parse<'a>(data: &'a [u8]) -> Result<Self, Err<Error<'a>>> {
5757
// Extract dex header with information about data location
58-
let (strings_offset, header) = Self::parse_dex_header(data)?;
58+
let (_, header) = Self::parse_dex_header(data)?;
5959

6060
// Extract defined strings
61-
let (types_offset, strings) =
62-
Self::parse_strings(strings_offset, data, &header)?;
61+
let strings = Self::parse_strings(data, &header);
6362

6463
// Extract defined types
65-
let (proto_offset, types) =
66-
Self::parse_types(types_offset, &header, &strings)?;
64+
let types = Self::parse_types(data, &header, &strings);
6765

6866
// Extract defined prototypes
69-
let (field_offset, protos) =
70-
Self::parse_protos(proto_offset, data, &header, &strings, &types)?;
67+
let protos = Self::parse_protos(data, &header, &strings, &types);
7168

7269
// Extract defined fields
73-
let (method_offset, fields) =
74-
Self::parse_fields(field_offset, &header, &strings, &types)?;
70+
let fields = Self::parse_fields(data, &header, &strings, &types);
7571

7672
// Extract defined methods
77-
let (class_offset, methods) = Self::parse_methods(
78-
method_offset,
73+
let methods = Self::parse_methods(
74+
data,
7975
&header,
8076
&strings,
8177
&types,
8278
&protos,
83-
)?;
79+
);
8480

8581
// Extract defined classes
86-
let (_, class_defs) =
87-
Self::parse_class_defs(class_offset, &header, &strings, &types)?;
82+
let class_defs = Self::parse_class_defs(data, &header, &strings, &types);
8883

8984
// Extract map information
9085
let map_list = Self::parse_map_items(data, &header);
@@ -201,18 +196,22 @@ impl Dex {
201196
/// A HashMap is needed to quickly access an item by its index.
202197
///
203198
/// See: https://source.android.com/docs/core/runtime/dex-format#string-item
204-
fn parse_strings<'a>(
205-
remainder: &'a [u8],
206-
data: &'a [u8],
199+
fn parse_strings(
200+
data: &[u8],
207201
header: &DexHeader,
208-
) -> IResult<&'a [u8], Vec<Rc<String>>> {
202+
) -> Vec<Rc<String>> {
209203
// DEX file doesn't contain strings.
210204
// It's a strange case, but it needs to be checked.
211205
if header.string_ids_off == 0 {
212-
return Ok((remainder, Vec::new()));
206+
return Vec::new();
213207
}
214208

215-
let mut it = iterator(remainder, le_u32);
209+
let table_slice = match data.get(header.string_ids_off as usize..) {
210+
Some(slice) => slice,
211+
None => return Vec::new(),
212+
};
213+
214+
let mut it = iterator(table_slice, le_u32::<&[u8], Error>);
216215

217216
let string_offsets = it
218217
.by_ref()
@@ -222,9 +221,9 @@ impl Dex {
222221
.map(Rc::new)
223222
.collect();
224223

225-
let (rem, _) = it.finish()?;
224+
let _ = it.finish();
226225

227-
Ok((rem, string_offsets))
226+
string_offsets
228227
}
229228

230229
/// Parses string by index in the string_ids_off table
@@ -266,18 +265,23 @@ impl Dex {
266265
/// `type_item = string_item[type_ids_off[idx]]`
267266
///
268267
/// See: https://source.android.com/docs/core/runtime/dex-format#type-id-item
269-
fn parse_types<'a>(
270-
remainder: &'a [u8],
268+
fn parse_types(
269+
data: &[u8],
271270
header: &DexHeader,
272271
string_items: &[Rc<String>],
273-
) -> IResult<&'a [u8], Vec<Rc<String>>> {
272+
) -> Vec<Rc<String>> {
274273
// DEX file doesn't contain types.
275274
// It's a strange case, but it needs to be checked.
276275
if header.type_ids_off == 0 {
277-
return Ok((remainder, Vec::new()));
276+
return Vec::new();
278277
}
279278

280-
let mut it = iterator(remainder, le_u32);
279+
let table_slice = match data.get(header.type_ids_off as usize..) {
280+
Some(slice) => slice,
281+
None => return Vec::new(),
282+
};
283+
284+
let mut it = iterator(table_slice, le_u32::<&[u8], Error>);
281285

282286
let type_indexes = it
283287
.by_ref()
@@ -286,29 +290,33 @@ impl Dex {
286290
.filter_map(|idx| string_items.get(idx as usize).cloned())
287291
.collect();
288292

289-
let (rem, _) = it.finish()?;
293+
let _ = it.finish();
290294

291-
Ok((rem, type_indexes))
295+
type_indexes
292296
}
293297

294298
/// Collects a list of prototypes in a hashmap from proto_ids_off list.
295299
///
296300
/// See: https://source.android.com/docs/core/runtime/dex-format#proto-id-item
297301
/// See: https://source.android.com/docs/core/runtime/dex-format#type-list
298-
fn parse_protos<'a>(
299-
remainder: &'a [u8],
300-
data: &'a [u8],
302+
fn parse_protos(
303+
data: &[u8],
301304
header: &DexHeader,
302305
string_items: &[Rc<String>],
303306
type_items: &[Rc<String>],
304-
) -> IResult<&'a [u8], Vec<Rc<ProtoItem>>> {
307+
) -> Vec<Rc<ProtoItem>> {
305308
// DEX file doesn't contain prototypes.
306309
// It's a strange case, but it needs to be checked.
307310
if header.proto_ids_off == 0 {
308-
return Ok((remainder, Vec::new()));
311+
return Vec::new();
309312
}
310313

311-
let mut it = iterator(remainder, (le_u32, le_u32, le_u32));
314+
let table_slice = match data.get(header.proto_ids_off as usize..) {
315+
Some(slice) => slice,
316+
None => return Vec::new(),
317+
};
318+
319+
let mut it = iterator(table_slice, (le_u32::<&[u8], Error>, le_u32, le_u32));
312320

313321
let proto_entries = it
314322
.by_ref()
@@ -337,9 +345,9 @@ impl Dex {
337345
})
338346
.collect();
339347

340-
let (rem, _) = it.finish()?;
348+
let _ = it.finish();
341349

342-
Ok((rem, proto_entries))
350+
proto_entries
343351
}
344352

345353
/// Collects a type list to list of strings from given offset
@@ -374,19 +382,24 @@ impl Dex {
374382
/// Collects a list of fields in a hashmap from field_ids_off list.
375383
///
376384
/// See: https://source.android.com/docs/core/runtime/dex-format#field-id-item
377-
fn parse_fields<'a>(
378-
remainder: &'a [u8],
385+
fn parse_fields(
386+
data: &[u8],
379387
header: &DexHeader,
380388
string_items: &[Rc<String>],
381389
type_items: &[Rc<String>],
382-
) -> IResult<&'a [u8], Vec<FieldItem>> {
390+
) -> Vec<FieldItem> {
383391
// DEX file doesn't contain fields.
384392
// It's a strange case, but it needs to be checked.
385393
if header.field_ids_off == 0 {
386-
return Ok((remainder, Vec::new()));
394+
return Vec::new();
387395
}
388396

389-
let mut it = iterator(remainder, (le_u16, le_u16, le_u32));
397+
let table_slice = match data.get(header.field_ids_off as usize..) {
398+
Some(slice) => slice,
399+
None => return Vec::new(),
400+
};
401+
402+
let mut it = iterator(table_slice, (le_u16::<&[u8], Error>, le_u16, le_u32));
390403

391404
let field_entries = it
392405
.by_ref()
@@ -401,28 +414,33 @@ impl Dex {
401414
})
402415
.collect();
403416

404-
let (rem, _) = it.finish()?;
417+
let _ = it.finish();
405418

406-
Ok((rem, field_entries))
419+
field_entries
407420
}
408421

409422
/// Collects a list of methods in a hashmap from method_ids_off list.
410423
///
411424
/// See: https://source.android.com/docs/core/runtime/dex-format#method-id-item
412-
fn parse_methods<'a>(
413-
remainder: &'a [u8],
425+
fn parse_methods(
426+
data: &[u8],
414427
header: &DexHeader,
415428
string_items: &[Rc<String>],
416429
type_items: &[Rc<String>],
417430
proto_items: &[Rc<ProtoItem>],
418-
) -> IResult<&'a [u8], Vec<MethodItem>> {
431+
) -> Vec<MethodItem> {
419432
// DEX file doesn't contain methods
420433
// It's a strange case, but it needs to be checked.
421434
if header.method_ids_off == 0 {
422-
return Ok((remainder, Vec::new()));
435+
return Vec::new();
423436
}
424437

425-
let mut it = iterator(remainder, (le_u16, le_u16, le_u32));
438+
let table_slice = match data.get(header.method_ids_off as usize..) {
439+
Some(slice) => slice,
440+
None => return Vec::new(),
441+
};
442+
443+
let mut it = iterator(table_slice, (le_u16::<&[u8], Error>, le_u16, le_u32));
426444

427445
let method_entries = it
428446
.by_ref()
@@ -437,32 +455,45 @@ impl Dex {
437455
})
438456
.collect();
439457

440-
let (rem, _) = it.finish()?;
458+
let _ = it.finish();
441459

442-
Ok((rem, method_entries))
460+
method_entries
443461
}
444462

445463
/// Collects a list of classes from class_defs_off list.
446464
/// Only a part of the fields is extracted, because not all of them are
447465
/// useful when writing YARA rules.
448466
///
449467
/// See: https://source.android.com/docs/core/runtime/dex-format#class-def-item
450-
fn parse_class_defs<'a>(
451-
remainder: &'a [u8],
468+
fn parse_class_defs(
469+
data: &[u8],
452470
header: &DexHeader,
453471
string_items: &[Rc<String>],
454472
type_items: &[Rc<String>],
455-
) -> IResult<&'a [u8], Vec<ClassItem>> {
473+
) -> Vec<ClassItem> {
456474
// DEX file doesn't contain classess
457475
// It's a strange case, but it needs to be checked.
458476
if header.class_defs_off == 0 {
459-
return Ok((remainder, Vec::new()));
477+
return Vec::new();
460478
}
461479

462-
// (class_idx, access_flags, superclass_idx, interfaces_off, source_file_idx, annotations_off, class_data_off, static_values_off)
480+
let table_slice = match data.get(header.class_defs_off as usize..) {
481+
Some(slice) => slice,
482+
None => return Vec::new(),
483+
};
484+
463485
let mut it = iterator(
464-
remainder,
465-
(le_u32::<&[u8], Error>, le_u32, le_u32, le_u32, le_u32, le_u32, le_u32, le_u32),
486+
table_slice,
487+
(
488+
le_u32::<&[u8], Error>, // class_idx
489+
le_u32, // access_flags
490+
le_u32, // superclass_idx
491+
le_u32, // interfaces_off
492+
le_u32, // source_file_idx
493+
le_u32, // annotations_off
494+
le_u32, // class_data_off
495+
le_u32, // static_values_off
496+
),
466497
);
467498

468499
let class_entries = it
@@ -502,9 +533,9 @@ impl Dex {
502533
)
503534
.collect();
504535

505-
let (rem, _) = it.finish()?;
536+
let _ = it.finish();
506537

507-
Ok((rem, class_entries))
538+
class_entries
508539
}
509540

510541
/// Collects information about maps from the DEX file

0 commit comments

Comments
 (0)