Skip to content

Commit fe0ad48

Browse files
committed
perf: optimize parsing of imports and exports
Pre-allocate vector capacities in `parse_imports` and `parse_exports` to reduce reallocations and improve performance. Introduce an `ImportDll` enum to clarify and simplify logic for mapping import ordinals to names, avoiding repeated string lowercasing.
1 parent 103d6f0 commit fe0ad48

1 file changed

Lines changed: 80 additions & 53 deletions

File tree

lib/src/modules/pe/parser.rs

Lines changed: 80 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use std::collections::{HashMap, VecDeque};
44
use std::default::Default;
55
use std::iter::zip;
66
use std::mem;
7-
use std::str::{from_utf8, FromStr};
7+
use std::str::{FromStr, from_utf8};
88
use std::sync::OnceLock;
99

1010
use bstr::{BStr, ByteSlice};
@@ -14,11 +14,11 @@ use memchr::memmem;
1414
use nom::branch::{alt, permutation};
1515
use nom::bytes::complete::{take, take_till, take_while_m_n};
1616
use nom::combinator::{
17-
cond, consumed, iterator, map, opt, success, verify, Success,
17+
Success, cond, consumed, iterator, map, opt, success, verify,
1818
};
1919
use nom::error::ErrorKind;
2020
use nom::multi::{
21-
count, fold_many0, fold_many1, length_data, many0, many1, many_m_n,
21+
count, fold_many0, fold_many1, length_data, many_m_n, many0, many1,
2222
};
2323
use nom::number::complete::{le_u16, le_u32, le_u64, u8};
2424
use nom::{Err, IResult, Parser, ToUsize};
@@ -905,12 +905,11 @@ impl<'a> PE<'a> {
905905
.ok()
906906
.and_then(|name| name.strip_prefix('/'))
907907
.and_then(|offset| u32::from_str(offset).ok())
908-
&& let Some(s) = string_table.get(offset as usize..)
909-
&& let Ok((_, s)) =
910-
take_till::<_, &[u8], Error>(|c| c == 0)(s)
911-
{
912-
section.full_name = Some(BStr::new(s));
913-
}
908+
&& let Some(s) = string_table.get(offset as usize..)
909+
&& let Ok((_, s)) = take_till::<_, &[u8], Error>(|c| c == 0)(s)
910+
{
911+
section.full_name = Some(BStr::new(s));
912+
}
914913

915914
Ok((remainder, section))
916915
}
@@ -1473,23 +1472,23 @@ impl<'a> PE<'a> {
14731472
// value as the upper bound and avoid some completely
14741473
// corrupt entries with random values.
14751474
&& (rsrc_entry.size as usize) < 0x3FFFFFFF
1476-
{
1477-
resources.push(Resource {
1478-
type_id: ids.0,
1479-
rsrc_id: ids.1,
1480-
lang_id: ids.2,
1481-
// `rsrc_entry.offset` is relative to the start of
1482-
// the resource section, so it's actually an RVA.
1483-
// Here we convert it to a file offset.
1484-
offset: self.rva_to_offset(rsrc_entry.offset),
1485-
rva: rsrc_entry.offset,
1486-
length: rsrc_entry.size,
1487-
});
1488-
1489-
if resources.len() == Self::MAX_PE_RESOURCES {
1490-
return Some((resources_info, resources));
1491-
}
1475+
{
1476+
resources.push(Resource {
1477+
type_id: ids.0,
1478+
rsrc_id: ids.1,
1479+
lang_id: ids.2,
1480+
// `rsrc_entry.offset` is relative to the start of
1481+
// the resource section, so it's actually an RVA.
1482+
// Here we convert it to a file offset.
1483+
offset: self.rva_to_offset(rsrc_entry.offset),
1484+
rva: rsrc_entry.offset,
1485+
length: rsrc_entry.size,
1486+
});
1487+
1488+
if resources.len() == Self::MAX_PE_RESOURCES {
1489+
return Some((resources_info, resources));
14921490
}
1491+
}
14931492
}
14941493
}
14951494
}
@@ -1525,8 +1524,10 @@ impl<'a> PE<'a> {
15251524
/// Returns a parser that parses a WIN_CERTIFICATE structure.
15261525
fn win_cert_parser(
15271526
&self,
1528-
) -> impl FnMut(&'a [u8]) -> IResult<&'a [u8], Vec<AuthenticodeSignature<'a>>>
1529-
+ '_ {
1527+
) -> impl FnMut(
1528+
&'a [u8],
1529+
) -> IResult<&'a [u8], Vec<AuthenticodeSignature<'a>>>
1530+
+ '_ {
15301531
move |input: &'a [u8]| {
15311532
// Parse the WIN_CERTIFICATE structure.
15321533
let (remainder, (length, _revision, _cert_type)) = (
@@ -1556,8 +1557,10 @@ impl<'a> PE<'a> {
15561557
/// Authenticode signature.
15571558
fn signature_parser(
15581559
&self,
1559-
) -> impl FnMut(&'a [u8]) -> IResult<&'a [u8], Vec<AuthenticodeSignature<'a>>>
1560-
+ '_ {
1560+
) -> impl FnMut(
1561+
&'a [u8],
1562+
) -> IResult<&'a [u8], Vec<AuthenticodeSignature<'a>>>
1563+
+ '_ {
15611564
move |input: &'a [u8]| {
15621565
let signatures = AuthenticodeParser::parse(input, self)
15631566
.map_err(|_| Err::Error(Error::new(input, ErrorKind::Fail)))?;
@@ -1672,7 +1675,7 @@ impl<'a> PE<'a> {
16721675
.parse(cv_info)
16731676
{
16741677
Ok((_, (_signature, _padding, pdb_path))) => {
1675-
return Some(pdb_path)
1678+
return Some(pdb_path);
16761679
}
16771680
Err(_) => continue,
16781681
};
@@ -1793,7 +1796,9 @@ impl<'a> PE<'a> {
17931796
let is_32_bits =
17941797
self.optional_hdr.magic != Self::IMAGE_NT_OPTIONAL_HDR64_MAGIC;
17951798

1796-
let mut imported_funcs = Vec::new();
1799+
let estimated_descriptors =
1800+
min(input.len() / Self::SIZE_OF_DIR_ENTRY, Self::MAX_PE_IMPORTS);
1801+
let mut imported_funcs = Vec::with_capacity(estimated_descriptors);
17971802

17981803
// Parse import descriptors until finding one that is empty (filled
17991804
// with null values), which indicates the end of the directory table;
@@ -1840,6 +1845,16 @@ impl<'a> PE<'a> {
18401845
continue;
18411846
};
18421847

1848+
let import_dll = if dll_name.eq_ignore_ascii_case("ws2_32.dll")
1849+
|| dll_name.eq_ignore_ascii_case("wsock32.dll")
1850+
{
1851+
ImportDll::Wsock32
1852+
} else if dll_name.eq_ignore_ascii_case("oleaut32.dll") {
1853+
ImportDll::Oleaut32
1854+
} else {
1855+
ImportDll::Other
1856+
};
1857+
18431858
// Use the INT (a.k.a: OriginalFirstThunk) if it is non-zero, but
18441859
// fallback to using the IAT (a.k.a: FirstThunk).
18451860
let thunks = if descriptor.import_name_table > 0 {
@@ -1849,20 +1864,25 @@ impl<'a> PE<'a> {
18491864
}
18501865
.or_else(|| self.data_at_rva(descriptor.import_address_table));
18511866

1852-
let thunks = match thunks {
1867+
let thunks_slice = match thunks {
18531868
Some(thunk) => thunk,
18541869
None => continue,
18551870
};
18561871

1872+
let estimated_funcs = min(
1873+
thunks_slice.len() / if is_32_bits { 4 } else { 8 },
1874+
Self::MAX_PE_IMPORTS,
1875+
);
1876+
18571877
// Parse the thunks, which are an array of 64-bits or 32-bits
18581878
// values, depending on whether this is 64-bits PE file. The
18591879
// array is terminated by a null thunk.
18601880
let thunks = iterator(
1861-
thunks,
1881+
thunks_slice,
18621882
verify(uint(is_32_bits), |thunk| *thunk != 0),
18631883
);
18641884

1865-
let mut funcs = Vec::new();
1885+
let mut funcs = Vec::with_capacity(estimated_funcs);
18661886

18671887
for (i, mut thunk) in
18681888
&mut thunks.take(Self::MAX_PE_IMPORTS).enumerate()
@@ -1897,7 +1917,7 @@ impl<'a> PE<'a> {
18971917
if import_by_ordinal {
18981918
let ordinal = (thunk & 0xffff) as u16;
18991919
func.ordinal = Some(ordinal);
1900-
func.name = ord_to_name(dll_name, ordinal);
1920+
func.name = ord_to_name(import_dll, ordinal);
19011921
} else {
19021922
// When descriptor values are virtual addresses, thunks are
19031923
// virtual addresses too and need to be converted to RVAs.
@@ -2109,17 +2129,16 @@ impl<'a> PE<'a> {
21092129

21102130
// Create a vector with one item per exported function. Items in the
21112131
// array initially have function RVA and ordinal only.
2112-
let mut exported_funcs: Vec<_> = func_rvas
2113-
.take(num_exports)
2114-
.enumerate()
2115-
.filter_map(|(i, rva)| {
2116-
Some(ExportedFunc {
2132+
let mut exported_funcs = Vec::with_capacity(num_exports);
2133+
for (i, rva) in func_rvas.take(num_exports).enumerate() {
2134+
if let Some(ordinal) = exports.base.checked_add(i as u32) {
2135+
exported_funcs.push(ExportedFunc {
21172136
rva,
2118-
ordinal: exports.base.checked_add(i as u32)?,
2137+
ordinal,
21192138
..Default::default()
2120-
})
2121-
})
2122-
.collect();
2139+
});
2140+
}
2141+
}
21232142

21242143
let names = self
21252144
.parse_at_rva(exports.address_of_names, count(le_u32, num_names))
@@ -2139,10 +2158,11 @@ impl<'a> PE<'a> {
21392158
.find_position(|ordinal| {
21402159
*ordinal as u32 == f.ordinal - exports.base
21412160
})
2142-
&& let Some(name_rva) = names.get(idx) {
2143-
f.name =
2144-
self.str_at_rva(*name_rva, Self::MAX_FUNC_NAME_LENGTH);
2145-
}
2161+
&& let Some(name_rva) = names.get(idx)
2162+
{
2163+
f.name =
2164+
self.str_at_rva(*name_rva, Self::MAX_FUNC_NAME_LENGTH);
2165+
}
21462166

21472167
// If the function's RVA is within the exports section (as given
21482168
// by the RVA and size fields in the directory entry), this is a
@@ -2822,16 +2842,23 @@ fn utf16_le_string() -> impl FnMut(&[u8]) -> IResult<&[u8], String> {
28222842
}
28232843
}
28242844

2845+
#[derive(Copy, Clone, PartialEq, Eq)]
2846+
enum ImportDll {
2847+
Wsock32,
2848+
Oleaut32,
2849+
Other,
2850+
}
2851+
28252852
/// Convert ordinal number to function name.
28262853
///
28272854
/// For some well-known DLLs the returned name is the one that that corresponds
28282855
/// to the given ordinal. For the remaining DLLs the returned name has the form
28292856
/// "ordN" where N is the ordinal (e.g: "ord1", "ord23").
2830-
fn ord_to_name(dll_name: &str, ordinal: u16) -> Option<String> {
2831-
let func_name = match dll_name.to_ascii_lowercase().as_str() {
2832-
"ws2_32.dll" | "wsock32.dll" => wsock32_ord_to_name(ordinal),
2833-
"oleaut32.dll" => oleaut32_ord_to_name(ordinal),
2834-
_ => None,
2857+
fn ord_to_name(well_known: ImportDll, ordinal: u16) -> Option<String> {
2858+
let func_name = match well_known {
2859+
ImportDll::Wsock32 => wsock32_ord_to_name(ordinal),
2860+
ImportDll::Oleaut32 => oleaut32_ord_to_name(ordinal),
2861+
ImportDll::Other => None,
28352862
};
28362863

28372864
func_name.map(|n| n.to_owned()).or_else(|| Some(format!("ord{ordinal}")))

0 commit comments

Comments
 (0)