From 5e89e5baf726998a207345e66f673d60a987de8a Mon Sep 17 00:00:00 2001 From: "Victor M. Alvarez" Date: Fri, 15 May 2026 12:04:11 +0200 Subject: [PATCH 1/3] fix: clear caches in `dex` module when starting a new scan. --- lib/src/modules/dex/mod.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/src/modules/dex/mod.rs b/lib/src/modules/dex/mod.rs index 803d2dbf7..79e454fb7 100644 --- a/lib/src/modules/dex/mod.rs +++ b/lib/src/modules/dex/mod.rs @@ -23,6 +23,8 @@ thread_local!( #[module_main] fn main(data: &[u8], _meta: Option<&[u8]>) -> Result { + CHECKSUM_CACHE.with(|cache| *cache.borrow_mut() = None); + SIGNATURE_CACHE.with(|cache| *cache.borrow_mut() = None); match parser::Dex::parse(data) { Ok(dex) => Ok(dex.into()), Err(_) => { From 21495fcd928ca80163f3189919548cfd96493c65 Mon Sep 17 00:00:00 2001 From: "Victor M. Alvarez" Date: Fri, 15 May 2026 12:16:43 +0200 Subject: [PATCH 2/3] chore: write a benchmark for measuring performance of `dex` module. --- Cargo.lock | 148 +++++++++++++++++++++++++++++++++++-- lib/Cargo.toml | 7 +- lib/benches/commons/mod.rs | 41 ++++++++++ lib/benches/dex.rs | 24 ++++++ 4 files changed, 212 insertions(+), 8 deletions(-) create mode 100644 lib/benches/commons/mod.rs create mode 100644 lib/benches/dex.rs diff --git a/Cargo.lock b/Cargo.lock index 1f0e51e65..96750195c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -41,6 +41,12 @@ dependencies = [ "libc", ] +[[package]] +name = "anes" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" + [[package]] name = "annotate-snippets" version = "0.12.15" @@ -445,6 +451,33 @@ dependencies = [ "windows-link", ] +[[package]] +name = "ciborium" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" +dependencies = [ + "ciborium-io", + "ciborium-ll", + "serde", +] + +[[package]] +name = "ciborium-io" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" + +[[package]] +name = "ciborium-ll" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" +dependencies = [ + "ciborium-io", + "half", +] + [[package]] name = "clap" version = "4.6.0" @@ -730,6 +763,42 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "criterion" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" +dependencies = [ + "anes", + "cast", + "ciborium", + "clap", + "criterion-plot", + "is-terminal", + "itertools 0.10.5", + "num-traits", + "once_cell", + "oorandom", + "plotters", + "rayon", + "regex", + "serde", + "serde_derive", + "serde_json", + "tinytemplate", + "walkdir", +] + +[[package]] +name = "criterion-plot" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" +dependencies = [ + "cast", + "itertools 0.10.5", +] + [[package]] name = "crossbeam" version = "0.8.4" @@ -813,6 +882,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "crunchy" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" + [[package]] name = "crypto-bigint" version = "0.5.5" @@ -1428,6 +1503,17 @@ dependencies = [ "subtle", ] +[[package]] +name = "half" +version = "2.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" +dependencies = [ + "cfg-if", + "crunchy", + "zerocopy", +] + [[package]] name = "hashbrown" version = "0.14.5" @@ -1725,6 +1811,15 @@ version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] + [[package]] name = "itertools" version = "0.14.0" @@ -2249,6 +2344,34 @@ dependencies = [ "spki", ] +[[package]] +name = "plotters" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747" +dependencies = [ + "num-traits", + "plotters-backend", + "plotters-svg", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "plotters-backend" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a" + +[[package]] +name = "plotters-svg" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670" +dependencies = [ + "plotters-backend", +] + [[package]] name = "portable-atomic" version = "1.13.1" @@ -3243,6 +3366,16 @@ dependencies = [ "zerovec", ] +[[package]] +name = "tinytemplate" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" +dependencies = [ + "serde", + "serde_json", +] + [[package]] name = "tokio" version = "1.48.0" @@ -3832,7 +3965,7 @@ dependencies = [ "cranelift-frontend", "cranelift-native", "gimli 0.33.0", - "itertools", + "itertools 0.14.0", "log", "object", "pulley-interpreter", @@ -4263,6 +4396,7 @@ dependencies = [ "bstr", "const-oid", "crc32fast", + "criterion", "daachorse", "der-parser", "digest", @@ -4277,7 +4411,7 @@ dependencies = [ "intaglio", "inventory", "ipnet", - "itertools", + "itertools 0.14.0", "js-sys", "log", "magic", @@ -4355,7 +4489,7 @@ dependencies = [ "globwalk", "home", "indicatif", - "itertools", + "itertools 0.14.0", "log", "predicates", "protobuf", @@ -4415,7 +4549,7 @@ dependencies = [ "dashmap", "futures", "goldenfile", - "itertools", + "itertools 0.14.0", "js-sys", "regex", "serde", @@ -4456,7 +4590,7 @@ dependencies = [ "globwalk", "goldenfile", "indexmap", - "itertools", + "itertools 0.14.0", "log", "logos", "num-traits", @@ -4482,7 +4616,7 @@ dependencies = [ "base64", "globwalk", "goldenfile", - "itertools", + "itertools 0.14.0", "protobuf", "protobuf-codegen", "yansi", @@ -4496,7 +4630,7 @@ dependencies = [ "chrono", "globwalk", "goldenfile", - "itertools", + "itertools 0.14.0", "protobuf", "protobuf-codegen", "yansi", diff --git a/lib/Cargo.toml b/lib/Cargo.toml index d8f6c270e..7cdf03b7e 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -348,10 +348,15 @@ protobuf-parse = { workspace = true, optional = true } yara-x-proto = { workspace = true, optional = true } [dev-dependencies] +criterion = "0.5" globwalk = { workspace = true } goldenfile = { workspace = true } ihex = { workspace = true } pretty_assertions = { workspace = true } rayon = { workspace = true } yara-x-proto-yaml = { workspace = true } -zip = { workspace = true } \ No newline at end of file +zip = { workspace = true } + +[[bench]] +name = "dex" +harness = false \ No newline at end of file diff --git a/lib/benches/commons/mod.rs b/lib/benches/commons/mod.rs new file mode 100644 index 000000000..e6d8f6e33 --- /dev/null +++ b/lib/benches/commons/mod.rs @@ -0,0 +1,41 @@ +use std::fs::File; +use std::io::Read; +use std::path::Path; + +pub fn create_binary_from_ihex(ihex: &str) -> Vec { + let mut reader = ihex::Reader::new(ihex); + let mut data = Vec::new(); + while let Some(Ok(record)) = reader.next() { + if let ihex::Record::Data { value, .. } = record { + data.extend(value); + } + } + data +} + +pub fn create_binary_from_zipped_ihex>(path: P) -> Vec { + let path = path.as_ref(); + let f = File::open(path) + .unwrap_or_else(|_| panic!("can not open file: {:?}", &path)); + + let mut zip = zip::ZipArchive::new(f) + .unwrap_or_else(|_| panic!("can not unzip file: {:?}", &path)); + + let path_without_zip_ext = path.with_extension(""); + let inner_file_name = + path_without_zip_ext.file_name().unwrap().to_str().unwrap(); + + let mut inner_file = zip.by_name(inner_file_name).unwrap_or_else(|_| { + panic!( + "ZIP archive {:?} doesn't contain file: {:?}", + &path, &inner_file_name + ) + }); + + let mut ihex = String::new(); + inner_file.read_to_string(&mut ihex).unwrap_or_else(|_| { + panic!("can not read ihex content from : {:?}", &path) + }); + + create_binary_from_ihex(ihex.as_str()) +} diff --git a/lib/benches/dex.rs b/lib/benches/dex.rs new file mode 100644 index 000000000..9e9727993 --- /dev/null +++ b/lib/benches/dex.rs @@ -0,0 +1,24 @@ +use criterion::{black_box, criterion_group, criterion_main, Criterion}; + +mod commons; + +use commons::create_binary_from_zipped_ihex; + +fn bench_dex(c: &mut Criterion) { + let data = create_binary_from_zipped_ihex( + "src/modules/dex/tests/testdata/c14c75d58399825287e0ee0fcfede6ec06f93489fb52f70bca2736fae5fceab2.in.zip", + ); + + let mut group = c.benchmark_group("dex"); + + group.bench_function("parse", |b| { + b.iter(|| { + let _ = black_box(yara_x::mods::invoke::(black_box(&data))); + }); + }); + + group.finish(); +} + +criterion_group!(benches, bench_dex); +criterion_main!(benches); From efb5cada8b971be9d6405bcf73cf107bbd930368 Mon Sep 17 00:00:00 2001 From: "Victor M. Alvarez" Date: Fri, 15 May 2026 12:27:52 +0200 Subject: [PATCH 3/3] fix: issue in the `contains_method` function in the `dex` module. This function assumed that the `methods` vector is sorted by name and uses a binary search, but that's not the case. --- lib/src/modules/dex/mod.rs | 3 ++- lib/src/modules/dex/tests/mod.rs | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/src/modules/dex/mod.rs b/lib/src/modules/dex/mod.rs index 79e454fb7..667dac050 100644 --- a/lib/src/modules/dex/mod.rs +++ b/lib/src/modules/dex/mod.rs @@ -25,6 +25,7 @@ thread_local!( fn main(data: &[u8], _meta: Option<&[u8]>) -> Result { CHECKSUM_CACHE.with(|cache| *cache.borrow_mut() = None); SIGNATURE_CACHE.with(|cache| *cache.borrow_mut() = None); + match parser::Dex::parse(data) { Ok(dex) => Ok(dex.into()), Err(_) => { @@ -137,7 +138,7 @@ fn contains_method( Err(_) => return None, }; - Some(dex.methods.binary_search_by(|item| item.name.cmp(&str)).is_ok()) + Some(dex.methods.iter().any(|item| item.name.as_deref() == str.as_deref())) } /// Function that checks whether the DEX file contains the specified class diff --git a/lib/src/modules/dex/tests/mod.rs b/lib/src/modules/dex/tests/mod.rs index 8af4f315a..63bf6af9a 100644 --- a/lib/src/modules/dex/tests/mod.rs +++ b/lib/src/modules/dex/tests/mod.rs @@ -91,7 +91,8 @@ fn methods() { import "dex" rule test { condition: - dex.contains_method("getPackageName") + dex.contains_method("getPackageName") and + dex.contains_method("loadLibrary") } "#, &dex