diff --git a/lib/Cargo.toml b/lib/Cargo.toml index 7cdf03b7e..583fdccd5 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -359,4 +359,8 @@ zip = { workspace = true } [[bench]] name = "dex" +harness = false + +[[bench]] +name = "pe" harness = false \ No newline at end of file diff --git a/lib/benches/dex.rs b/lib/benches/dex.rs index 9e9727993..0d72bf475 100644 --- a/lib/benches/dex.rs +++ b/lib/benches/dex.rs @@ -1,4 +1,4 @@ -use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use criterion::{Criterion, black_box, criterion_group, criterion_main}; mod commons; @@ -13,7 +13,9 @@ fn bench_dex(c: &mut Criterion) { group.bench_function("parse", |b| { b.iter(|| { - let _ = black_box(yara_x::mods::invoke::(black_box(&data))); + let _ = black_box(yara_x::mods::invoke::( + black_box(&data), + )); }); }); diff --git a/lib/benches/pe.rs b/lib/benches/pe.rs new file mode 100644 index 000000000..ca55617ad --- /dev/null +++ b/lib/benches/pe.rs @@ -0,0 +1,26 @@ +use criterion::{Criterion, black_box, criterion_group, criterion_main}; + +mod commons; + +use commons::create_binary_from_zipped_ihex; + +fn bench_pe(c: &mut Criterion) { + let data = create_binary_from_zipped_ihex( + "src/modules/pe/tests/testdata/c704cca0fe4c9bdee18a302952540073b860e3b4d42e081f86d27bdb1cf6ede4.in.zip", + ); + + let mut group = c.benchmark_group("pe"); + + group.bench_function("parse", |b| { + b.iter(|| { + let _ = black_box(yara_x::mods::invoke::( + black_box(&data), + )); + }); + }); + + group.finish(); +} + +criterion_group!(benches, bench_pe); +criterion_main!(benches); diff --git a/lib/src/modules/dex/mod.rs b/lib/src/modules/dex/mod.rs index 667dac050..08c135e18 100644 --- a/lib/src/modules/dex/mod.rs +++ b/lib/src/modules/dex/mod.rs @@ -27,7 +27,7 @@ fn main(data: &[u8], _meta: Option<&[u8]>) -> Result { SIGNATURE_CACHE.with(|cache| *cache.borrow_mut() = None); match parser::Dex::parse(data) { - Ok(dex) => Ok(dex.into()), + Ok(dex) => Ok(dex), Err(_) => { let mut dex = Dex::new(); dex.set_is_dex(false); diff --git a/lib/src/modules/dex/parser.rs b/lib/src/modules/dex/parser.rs index 52b9d806a..7a6520385 100644 --- a/lib/src/modules/dex/parser.rs +++ b/lib/src/modules/dex/parser.rs @@ -1,6 +1,3 @@ -use std::borrow::Cow; -use std::rc::Rc; - use nom::bytes::complete::take; use nom::combinator::iterator; use nom::combinator::{cond, map, map_res, verify}; @@ -14,34 +11,9 @@ use crate::modules::utils::leb128::uleb128; type Error<'a> = nom::error::Error<&'a [u8]>; -#[derive(Default)] -pub struct Dex<'a> { - // DEX header information - header: DexHeader, - - // List with all found strings - strings: Vec>, - - // List with all found types - types: Vec>, - - // List with all found prototypes - protos: Vec>>, - - // List with all found fields - fields: Vec>, - - // List with all found methods - methods: Vec>, - - // List with all found classes - class_defs: Vec>, +pub struct Dex; - // Map information - map_list: Option, -} - -impl<'a> Dex<'a> { +impl Dex { const ENDIAN_CONSTANT: u32 = 0x12345678; const REVERSE_ENDIAN_CONSTANT: u32 = 0x78563412; const DEX_HEADER_SIZE: u32 = 0x70; @@ -54,7 +26,7 @@ impl<'a> Dex<'a> { const MAX_METHODS: usize = 1_000_000; const MAX_FIELDS: usize = 1_000_000; - pub fn parse(data: &'a [u8]) -> Result>> { + pub fn parse(data: &[u8]) -> Result>> { // Extract dex header with information about data location let (_, header) = Self::parse_dex_header(data)?; @@ -71,30 +43,32 @@ impl<'a> Dex<'a> { let fields = Self::parse_fields(data, &header, &strings, &types); // Extract defined methods - let methods = Self::parse_methods( - data, - &header, - &strings, - &types, - &protos, - ); + let methods = + Self::parse_methods(data, &header, &strings, &types, &protos); // Extract defined classes - let class_defs = Self::parse_class_defs(data, &header, &strings, &types); + let class_defs = + Self::parse_class_defs(data, &header, &strings, &types); // Extract map information let map_list = Self::parse_map_items(data, &header); - Ok(Self { - header, - strings, - types, - protos, - fields, - methods, - class_defs, - map_list, - }) + let mut dex = protos::dex::Dex::new(); + dex.set_is_dex(true); + + dex.header = MessageField::some(header.into()); + dex.strings = strings; + dex.types = types; + dex.protos = protos; + dex.fields = fields; + dex.methods = methods; + dex.class_defs = class_defs; + + if let Some(map_list) = map_list { + dex.map_list = MessageField::some(map_list); + } + + Ok(dex) } fn parse_dex_header(data: &[u8]) -> IResult<&[u8], DexHeader> { @@ -197,10 +171,7 @@ impl<'a> Dex<'a> { /// A HashMap is needed to quickly access an item by its index. /// /// See: https://source.android.com/docs/core/runtime/dex-format#string-item - fn parse_strings( - data: &'a [u8], - header: &DexHeader, - ) -> Vec> { + fn parse_strings(data: &[u8], header: &DexHeader) -> Vec { // DEX file doesn't contain strings. // It's a strange case, but it needs to be checked. if header.string_ids_off == 0 { @@ -212,18 +183,11 @@ impl<'a> Dex<'a> { None => return Vec::new(), }; - let mut it = iterator(table_slice, le_u32::<&[u8], Error>); - - let string_offsets = it - .by_ref() + iterator(table_slice, le_u32::<&[u8], Error>) .take(Self::MAX_STRINGS) .take(header.string_ids_size as usize) .filter_map(|offset| Self::parse_string_from_offset(data, offset)) - .collect(); - - let _ = it.finish(); - - string_offsets + .collect() } /// Parses string by index in the string_ids_off table @@ -234,9 +198,9 @@ impl<'a> Dex<'a> { /// Strings larger than 64KB will be considered invalid and the result will /// be None. fn parse_string_from_offset( - data: &'a [u8], + data: &[u8], string_data_offset: u32, - ) -> Option> { + ) -> Option { if string_data_offset < Self::DEX_HEADER_SIZE { return None; } @@ -249,10 +213,11 @@ impl<'a> Dex<'a> { } let (_, bytes) = - take::(utf16_size as usize)(slice).ok()?; + take::(utf16_size as usize)(slice) + .ok()?; - // Decode MUTF-8 string and return Cow<'a, str> - simd_cesu8::mutf8::decode(bytes).ok() + // Decode MUTF-8 string and return String + simd_cesu8::mutf8::decode(bytes).ok().map(|s| s.into_owned()) }) } @@ -263,10 +228,10 @@ impl<'a> Dex<'a> { /// /// See: https://source.android.com/docs/core/runtime/dex-format#type-id-item fn parse_types( - data: &'a [u8], + data: &[u8], header: &DexHeader, - string_items: &[Cow<'a, str>], - ) -> Vec> { + string_items: &[String], + ) -> Vec { // DEX file doesn't contain types. // It's a strange case, but it needs to be checked. if header.type_ids_off == 0 { @@ -278,18 +243,11 @@ impl<'a> Dex<'a> { None => return Vec::new(), }; - let mut it = iterator(table_slice, le_u32::<&[u8], Error>); - - let type_indexes = it - .by_ref() + iterator(table_slice, le_u32::<&[u8], Error>) .take(Self::MAX_TYPES) .take(header.type_ids_size as usize) .filter_map(|idx| string_items.get(idx as usize).cloned()) - .collect(); - - let _ = it.finish(); - - type_indexes + .collect() } /// Collects a list of prototypes in a hashmap from proto_ids_off list. @@ -297,11 +255,11 @@ impl<'a> Dex<'a> { /// See: https://source.android.com/docs/core/runtime/dex-format#proto-id-item /// See: https://source.android.com/docs/core/runtime/dex-format#type-list fn parse_protos( - data: &'a [u8], + data: &[u8], header: &DexHeader, - string_items: &[Cow<'a, str>], - type_items: &[Cow<'a, str>], - ) -> Vec>> { + string_items: &[String], + type_items: &[String], + ) -> Vec { // DEX file doesn't contain prototypes. // It's a strange case, but it needs to be checked. if header.proto_ids_off == 0 { @@ -313,10 +271,7 @@ impl<'a> Dex<'a> { None => return Vec::new(), }; - let mut it = iterator(table_slice, (le_u32::<&[u8], Error>, le_u32, le_u32)); - - let proto_entries = it - .by_ref() + iterator(table_slice, (le_u32::<&[u8], Error>, le_u32, le_u32)) .take(Self::MAX_PROTOS) .take(header.proto_ids_size as usize) .filter_map(|(shorty_idx, return_type_idx, parameters_off)| { @@ -333,28 +288,24 @@ impl<'a> Dex<'a> { .unwrap_or_default() }; - Some(Rc::new(ProtoItem { - shorty, - return_type, - parameters_count: parameters.len() as u32, - parameters, - })) + let mut item = protos::dex::ProtoItem::new(); + item.shorty = Some(shorty); + item.return_type = Some(return_type); + item.set_parameters_count(parameters.len() as u32); + item.parameters.extend(parameters); + Some(item) }) - .collect(); - - let _ = it.finish(); - - proto_entries + .collect() } /// Collects a type list to list of strings from given offset /// /// See: https://source.android.com/docs/core/runtime/dex-format#type-list fn parse_type_list( - data: &'a [u8], - type_items: &[Cow<'a, str>], + data: &[u8], + type_items: &[String], offset: u32, - ) -> Option>> { + ) -> Option> { let remainder = data.get(offset as usize..)?; let (remainder, size) = le_u32::<&[u8], Error>(remainder).ok()?; @@ -364,27 +315,23 @@ impl<'a> Dex<'a> { return None; } - let mut it = iterator(remainder, le_u16::<&[u8], Error>); - let items = it - .by_ref() - .take(size as usize) - .filter_map(|idx| type_items.get(idx as usize).cloned()) - .collect(); - - let _ = it.finish(); - - Some(items) + Some( + iterator(remainder, le_u16::<&[u8], Error>) + .take(size as usize) + .filter_map(|idx| type_items.get(idx as usize).cloned()) + .collect(), + ) } /// Collects a list of fields in a hashmap from field_ids_off list. /// /// See: https://source.android.com/docs/core/runtime/dex-format#field-id-item fn parse_fields( - data: &'a [u8], + data: &[u8], header: &DexHeader, - string_items: &[Cow<'a, str>], - type_items: &[Cow<'a, str>], - ) -> Vec> { + string_items: &[String], + type_items: &[String], + ) -> Vec { // DEX file doesn't contain fields. // It's a strange case, but it needs to be checked. if header.field_ids_off == 0 { @@ -396,36 +343,32 @@ impl<'a> Dex<'a> { None => return Vec::new(), }; - let mut it = iterator(table_slice, (le_u16::<&[u8], Error>, le_u16, le_u32)); - - let field_entries = it - .by_ref() + iterator(table_slice, (le_u16::<&[u8], Error>, le_u16, le_u32)) .take(Self::MAX_FIELDS) .take(header.field_ids_size as usize) .filter_map(|(class_idx, type_idx, name_idx)| { let class = type_items.get(class_idx as usize)?.clone(); let type_ = type_items.get(type_idx as usize)?.clone(); let name = string_items.get(name_idx as usize)?.clone(); - - Some(FieldItem { class, type_, name }) + let mut item = protos::dex::FieldItem::new(); + item.class = Some(class); + item.type_ = Some(type_); + item.name = Some(name); + Some(item) }) - .collect(); - - let _ = it.finish(); - - field_entries + .collect() } /// Collects a list of methods in a hashmap from method_ids_off list. /// /// See: https://source.android.com/docs/core/runtime/dex-format#method-id-item fn parse_methods( - data: &'a [u8], + data: &[u8], header: &DexHeader, - string_items: &[Cow<'a, str>], - type_items: &[Cow<'a, str>], - proto_items: &[Rc>], - ) -> Vec> { + string_items: &[String], + type_items: &[String], + proto_items: &[protos::dex::ProtoItem], + ) -> Vec { // DEX file doesn't contain methods // It's a strange case, but it needs to be checked. if header.method_ids_off == 0 { @@ -437,10 +380,7 @@ impl<'a> Dex<'a> { None => return Vec::new(), }; - let mut it = iterator(table_slice, (le_u16::<&[u8], Error>, le_u16, le_u32)); - - let method_entries = it - .by_ref() + iterator(table_slice, (le_u16::<&[u8], Error>, le_u16, le_u32)) .take(Self::MAX_METHODS) .take(header.method_ids_size as usize) .filter_map(|(class_idx, proto_idx, name_idx)| { @@ -448,13 +388,13 @@ impl<'a> Dex<'a> { let proto = proto_items.get(proto_idx as usize)?.clone(); let name = string_items.get(name_idx as usize)?.clone(); - Some(MethodItem { class, proto, name }) + let mut item = protos::dex::MethodItem::new(); + item.class = Some(class); + item.proto = MessageField::some(proto); + item.name = Some(name); + Some(item) }) - .collect(); - - let _ = it.finish(); - - method_entries + .collect() } /// Collects a list of classes from class_defs_off list. @@ -463,11 +403,11 @@ impl<'a> Dex<'a> { /// /// See: https://source.android.com/docs/core/runtime/dex-format#class-def-item fn parse_class_defs( - data: &'a [u8], + data: &[u8], header: &DexHeader, - string_items: &[Cow<'a, str>], - type_items: &[Cow<'a, str>], - ) -> Vec> { + string_items: &[String], + type_items: &[String], + ) -> Vec { // DEX file doesn't contain classess // It's a strange case, but it needs to be checked. if header.class_defs_off == 0 { @@ -479,7 +419,7 @@ impl<'a> Dex<'a> { None => return Vec::new(), }; - let mut it = iterator( + let it = iterator( table_slice, ( le_u32::<&[u8], Error>, // class_idx @@ -493,9 +433,7 @@ impl<'a> Dex<'a> { ), ); - let class_entries = it - .by_ref() - .take(Self::MAX_CLASSES) + it.take(Self::MAX_CLASSES) .take(header.class_defs_size as usize) .filter_map( |( @@ -520,32 +458,41 @@ impl<'a> Dex<'a> { None }; - Some(ClassItem { - class, - access_flags, - superclass, - source_file, - }) + let mut item = protos::dex::ClassItem::new(); + item.class = Some(class); + item.set_access_flags(access_flags); + if let Some(superclass) = superclass { + item.superclass = Some(superclass); + } + if let Some(source_file) = source_file { + item.source_file = Some(source_file); + } + Some(item) }, ) - .collect(); - - let _ = it.finish(); - - class_entries + .collect() } /// Collects information about maps from the DEX file /// /// See: https://source.android.com/docs/core/runtime/dex-format#map-list - fn parse_map_items(data: &[u8], header: &DexHeader) -> Option { + fn parse_map_items( + data: &[u8], + header: &DexHeader, + ) -> Option { data.get(header.map_off as usize..).and_then(|offset| { let (items_offset, size) = le_u32::<&[u8], Error>(offset).ok()?; - let mut it = iterator(items_offset, Self::parse_map_item); - let items = it.by_ref().take(size as usize).collect(); - let _ = it.finish(); + let items: Vec = + iterator(items_offset, Self::parse_map_item) + .take(size as usize) + .collect(); + + let mut map_list = protos::dex::MapList::new(); - Some(MapList { size, items }) + map_list.set_size(size); + map_list.items = items; + + Some(map_list) }) } @@ -553,7 +500,7 @@ impl<'a> Dex<'a> { /// /// See: https://source.android.com/docs/core/runtime/dex-format#map-item #[inline] - fn parse_map_item(input: &[u8]) -> IResult<&[u8], MapItem> { + fn parse_map_item(input: &[u8]) -> IResult<&[u8], protos::dex::MapItem> { let (remainder, (item_type, unused, size, offset)) = ( le_u16, // type le_u16, // unused @@ -562,7 +509,13 @@ impl<'a> Dex<'a> { ) .parse(input)?; - Ok((remainder, MapItem { item_type, unused, size, offset })) + let mut item = protos::dex::MapItem::new(); + item.type_ = Some(EnumOrUnknown::from_i32(item_type.into())); + item.set_unused(unused.into()); + item.set_size(size); + item.set_offset(offset); + + Ok((remainder, item)) } } @@ -639,86 +592,6 @@ struct DexHeader { header_offset: Option, } -#[derive(Debug)] -pub struct ProtoItem<'a> { - shorty: Cow<'a, str>, - return_type: Cow<'a, str>, - parameters_count: u32, - parameters: Vec>, -} - -#[derive(Debug)] -pub struct FieldItem<'a> { - class: Cow<'a, str>, - type_: Cow<'a, str>, - name: Cow<'a, str>, -} - -#[derive(Debug)] -pub struct MethodItem<'a> { - class: Cow<'a, str>, - proto: Rc>, - name: Cow<'a, str>, -} - -#[derive(Debug)] -pub struct ClassItem<'a> { - class: Cow<'a, str>, - access_flags: u32, - superclass: Option>, - source_file: Option>, -} - -#[derive(Default)] -pub struct MapList { - size: u32, - items: Vec, -} - -#[derive(Default)] -pub struct MapItem { - item_type: u16, - unused: u16, - size: u32, - offset: u32, -} - -impl<'a> From> for protos::dex::Dex { - fn from(dex: Dex<'a>) -> Self { - let mut result = protos::dex::Dex::new(); - - result.set_is_dex(true); - result.header = MessageField::some(dex.header.clone().into()); - - result - .strings - .extend(dex.strings.into_iter().map(|x| x.to_string())); - result - .types - .extend(dex.types.into_iter().map(|x| x.to_string())); - result.protos.extend( - dex.protos - .iter() - .map(|x| protos::dex::ProtoItem::from(x.as_ref())), - ); - result - .fields - .extend(dex.fields.iter().map(protos::dex::FieldItem::from)); - result - .methods - .extend(dex.methods.iter().map(protos::dex::MethodItem::from)); - result - .class_defs - .extend(dex.class_defs.iter().map(protos::dex::ClassItem::from)); - - if let Some(map_list) = dex.map_list { - result.map_list = MessageField::some(map_list.into()); - } - - result - } -} - impl From for protos::dex::DexHeader { fn from(header: DexHeader) -> Self { let mut result = protos::dex::DexHeader::new(); @@ -737,86 +610,3 @@ impl From for protos::dex::DexHeader { result } } - -impl<'a> From<&ProtoItem<'a>> for protos::dex::ProtoItem { - fn from(value: &ProtoItem<'a>) -> Self { - let mut result = protos::dex::ProtoItem::new(); - - result.shorty = Some(value.shorty.to_string()); - result.return_type = Some(value.return_type.to_string()); - result.set_parameters_count(value.parameters_count); - result - .parameters - .extend(value.parameters.iter().map(|x| x.to_string())); - - result - } -} - -impl<'a> From<&FieldItem<'a>> for protos::dex::FieldItem { - fn from(value: &FieldItem<'a>) -> Self { - let mut result = protos::dex::FieldItem::new(); - - result.class = Some(value.class.to_string()); - result.type_ = Some(value.type_.to_string()); - result.name = Some(value.name.to_string()); - - result - } -} - -impl<'a> From<&MethodItem<'a>> for protos::dex::MethodItem { - fn from(value: &MethodItem<'a>) -> Self { - let mut result = protos::dex::MethodItem::new(); - - result.class = Some(value.class.to_string()); - result.proto = MessageField::some(value.proto.as_ref().into()); - result.name = Some(value.name.to_string()); - - result - } -} - -impl<'a> From<&ClassItem<'a>> for protos::dex::ClassItem { - fn from(value: &ClassItem<'a>) -> Self { - let mut result = protos::dex::ClassItem::new(); - - result.class = Some(value.class.to_string()); - result.set_access_flags(value.access_flags); - - if let Some(superclass) = &value.superclass { - result.superclass = Some(superclass.to_string()); - } - - if let Some(source_file) = &value.source_file { - result.source_file = Some(source_file.to_string()); - } - - result - } -} - -impl From for protos::dex::MapList { - fn from(value: MapList) -> Self { - let mut result = protos::dex::MapList::new(); - - result.set_size(value.size); - result.items = - value.items.iter().map(protos::dex::MapItem::from).collect(); - - result - } -} - -impl From<&MapItem> for protos::dex::MapItem { - fn from(item: &MapItem) -> Self { - let mut result = protos::dex::MapItem::new(); - - result.type_ = Some(EnumOrUnknown::from_i32(item.item_type.into())); - result.set_unused(item.unused.into()); - result.set_size(item.size); - result.set_offset(item.offset); - - result - } -} diff --git a/lib/src/modules/pe/parser.rs b/lib/src/modules/pe/parser.rs index e0fb6f051..5f6604ab5 100644 --- a/lib/src/modules/pe/parser.rs +++ b/lib/src/modules/pe/parser.rs @@ -4,7 +4,7 @@ use std::collections::{HashMap, VecDeque}; use std::default::Default; use std::iter::zip; use std::mem; -use std::str::{from_utf8, FromStr}; +use std::str::{FromStr, from_utf8}; use std::sync::OnceLock; use bstr::{BStr, ByteSlice}; @@ -14,11 +14,11 @@ use memchr::memmem; use nom::branch::{alt, permutation}; use nom::bytes::complete::{take, take_till, take_while_m_n}; use nom::combinator::{ - cond, consumed, iterator, map, opt, success, verify, Success, + Success, cond, consumed, iterator, map, opt, success, verify, }; use nom::error::ErrorKind; use nom::multi::{ - count, fold_many0, fold_many1, length_data, many0, many1, many_m_n, + count, fold_many0, fold_many1, length_data, many_m_n, many0, many1, }; use nom::number::complete::{le_u16, le_u32, le_u64, u8}; use nom::{Err, IResult, Parser, ToUsize}; @@ -905,12 +905,11 @@ impl<'a> PE<'a> { .ok() .and_then(|name| name.strip_prefix('/')) .and_then(|offset| u32::from_str(offset).ok()) - && let Some(s) = string_table.get(offset as usize..) - && let Ok((_, s)) = - take_till::<_, &[u8], Error>(|c| c == 0)(s) - { - section.full_name = Some(BStr::new(s)); - } + && let Some(s) = string_table.get(offset as usize..) + && let Ok((_, s)) = take_till::<_, &[u8], Error>(|c| c == 0)(s) + { + section.full_name = Some(BStr::new(s)); + } Ok((remainder, section)) } @@ -1473,23 +1472,23 @@ impl<'a> PE<'a> { // value as the upper bound and avoid some completely // corrupt entries with random values. && (rsrc_entry.size as usize) < 0x3FFFFFFF - { - resources.push(Resource { - type_id: ids.0, - rsrc_id: ids.1, - lang_id: ids.2, - // `rsrc_entry.offset` is relative to the start of - // the resource section, so it's actually an RVA. - // Here we convert it to a file offset. - offset: self.rva_to_offset(rsrc_entry.offset), - rva: rsrc_entry.offset, - length: rsrc_entry.size, - }); - - if resources.len() == Self::MAX_PE_RESOURCES { - return Some((resources_info, resources)); - } + { + resources.push(Resource { + type_id: ids.0, + rsrc_id: ids.1, + lang_id: ids.2, + // `rsrc_entry.offset` is relative to the start of + // the resource section, so it's actually an RVA. + // Here we convert it to a file offset. + offset: self.rva_to_offset(rsrc_entry.offset), + rva: rsrc_entry.offset, + length: rsrc_entry.size, + }); + + if resources.len() == Self::MAX_PE_RESOURCES { + return Some((resources_info, resources)); } + } } } } @@ -1525,8 +1524,10 @@ impl<'a> PE<'a> { /// Returns a parser that parses a WIN_CERTIFICATE structure. fn win_cert_parser( &self, - ) -> impl FnMut(&'a [u8]) -> IResult<&'a [u8], Vec>> - + '_ { + ) -> impl FnMut( + &'a [u8], + ) -> IResult<&'a [u8], Vec>> + + '_ { move |input: &'a [u8]| { // Parse the WIN_CERTIFICATE structure. let (remainder, (length, _revision, _cert_type)) = ( @@ -1556,8 +1557,10 @@ impl<'a> PE<'a> { /// Authenticode signature. fn signature_parser( &self, - ) -> impl FnMut(&'a [u8]) -> IResult<&'a [u8], Vec>> - + '_ { + ) -> impl FnMut( + &'a [u8], + ) -> IResult<&'a [u8], Vec>> + + '_ { move |input: &'a [u8]| { let signatures = AuthenticodeParser::parse(input, self) .map_err(|_| Err::Error(Error::new(input, ErrorKind::Fail)))?; @@ -1672,7 +1675,7 @@ impl<'a> PE<'a> { .parse(cv_info) { Ok((_, (_signature, _padding, pdb_path))) => { - return Some(pdb_path) + return Some(pdb_path); } Err(_) => continue, }; @@ -1793,7 +1796,9 @@ impl<'a> PE<'a> { let is_32_bits = self.optional_hdr.magic != Self::IMAGE_NT_OPTIONAL_HDR64_MAGIC; - let mut imported_funcs = Vec::new(); + let estimated_descriptors = + min(input.len() / Self::SIZE_OF_DIR_ENTRY, Self::MAX_PE_IMPORTS); + let mut imported_funcs = Vec::with_capacity(estimated_descriptors); // Parse import descriptors until finding one that is empty (filled // with null values), which indicates the end of the directory table; @@ -1840,6 +1845,16 @@ impl<'a> PE<'a> { continue; }; + let import_dll = if dll_name.eq_ignore_ascii_case("ws2_32.dll") + || dll_name.eq_ignore_ascii_case("wsock32.dll") + { + ImportDll::Wsock32 + } else if dll_name.eq_ignore_ascii_case("oleaut32.dll") { + ImportDll::Oleaut32 + } else { + ImportDll::Other + }; + // Use the INT (a.k.a: OriginalFirstThunk) if it is non-zero, but // fallback to using the IAT (a.k.a: FirstThunk). let thunks = if descriptor.import_name_table > 0 { @@ -1849,20 +1864,25 @@ impl<'a> PE<'a> { } .or_else(|| self.data_at_rva(descriptor.import_address_table)); - let thunks = match thunks { + let thunks_slice = match thunks { Some(thunk) => thunk, None => continue, }; + let estimated_funcs = min( + thunks_slice.len() / if is_32_bits { 4 } else { 8 }, + Self::MAX_PE_IMPORTS, + ); + // Parse the thunks, which are an array of 64-bits or 32-bits // values, depending on whether this is 64-bits PE file. The // array is terminated by a null thunk. let thunks = iterator( - thunks, + thunks_slice, verify(uint(is_32_bits), |thunk| *thunk != 0), ); - let mut funcs = Vec::new(); + let mut funcs = Vec::with_capacity(estimated_funcs); for (i, mut thunk) in &mut thunks.take(Self::MAX_PE_IMPORTS).enumerate() @@ -1897,7 +1917,7 @@ impl<'a> PE<'a> { if import_by_ordinal { let ordinal = (thunk & 0xffff) as u16; func.ordinal = Some(ordinal); - func.name = ord_to_name(dll_name, ordinal); + func.name = ord_to_name(import_dll, ordinal); } else { // When descriptor values are virtual addresses, thunks are // virtual addresses too and need to be converted to RVAs. @@ -2109,17 +2129,16 @@ impl<'a> PE<'a> { // Create a vector with one item per exported function. Items in the // array initially have function RVA and ordinal only. - let mut exported_funcs: Vec<_> = func_rvas - .take(num_exports) - .enumerate() - .filter_map(|(i, rva)| { - Some(ExportedFunc { + let mut exported_funcs = Vec::with_capacity(num_exports); + for (i, rva) in func_rvas.take(num_exports).enumerate() { + if let Some(ordinal) = exports.base.checked_add(i as u32) { + exported_funcs.push(ExportedFunc { rva, - ordinal: exports.base.checked_add(i as u32)?, + ordinal, ..Default::default() - }) - }) - .collect(); + }); + } + } let names = self .parse_at_rva(exports.address_of_names, count(le_u32, num_names)) @@ -2139,10 +2158,11 @@ impl<'a> PE<'a> { .find_position(|ordinal| { *ordinal as u32 == f.ordinal - exports.base }) - && let Some(name_rva) = names.get(idx) { - f.name = - self.str_at_rva(*name_rva, Self::MAX_FUNC_NAME_LENGTH); - } + && let Some(name_rva) = names.get(idx) + { + f.name = + self.str_at_rva(*name_rva, Self::MAX_FUNC_NAME_LENGTH); + } // If the function's RVA is within the exports section (as given // by the RVA and size fields in the directory entry), this is a @@ -2822,16 +2842,23 @@ fn utf16_le_string() -> impl FnMut(&[u8]) -> IResult<&[u8], String> { } } +#[derive(Copy, Clone, PartialEq, Eq)] +enum ImportDll { + Wsock32, + Oleaut32, + Other, +} + /// Convert ordinal number to function name. /// /// For some well-known DLLs the returned name is the one that that corresponds /// to the given ordinal. For the remaining DLLs the returned name has the form /// "ordN" where N is the ordinal (e.g: "ord1", "ord23"). -fn ord_to_name(dll_name: &str, ordinal: u16) -> Option { - let func_name = match dll_name.to_ascii_lowercase().as_str() { - "ws2_32.dll" | "wsock32.dll" => wsock32_ord_to_name(ordinal), - "oleaut32.dll" => oleaut32_ord_to_name(ordinal), - _ => None, +fn ord_to_name(well_known: ImportDll, ordinal: u16) -> Option { + let func_name = match well_known { + ImportDll::Wsock32 => wsock32_ord_to_name(ordinal), + ImportDll::Oleaut32 => oleaut32_ord_to_name(ordinal), + ImportDll::Other => None, }; func_name.map(|n| n.to_owned()).or_else(|| Some(format!("ord{ordinal}")))