diff --git a/Cargo.lock b/Cargo.lock index e7f290452..51dfefdaa 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -436,9 +436,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" -version = "1.11.0" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b35204fbdc0b3f4446b89fc1ac2cf84a8a68971995d0bf2e925ec7cd960f9cb3" +checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" [[package]] name = "bzip2" diff --git a/lib/src/modules/protos/test_proto2.proto b/lib/src/modules/protos/test_proto2.proto index 976623d66..2bc19e072 100644 --- a/lib/src/modules/protos/test_proto2.proto +++ b/lib/src/modules/protos/test_proto2.proto @@ -219,6 +219,9 @@ message TestProto2 { help: "use `foo` instead", replacement: "foo" }]; + + // The metadata received by the module is copied into this field. + optional bytes metadata = 502; } enum TopLevelEnumeration { diff --git a/lib/src/modules/test_proto2/mod.rs b/lib/src/modules/test_proto2/mod.rs index 849360d8f..c5ee41e2a 100644 --- a/lib/src/modules/test_proto2/mod.rs +++ b/lib/src/modules/test_proto2/mod.rs @@ -86,7 +86,7 @@ fn to_int(ctx: &ScanContext, string: RuntimeString) -> Option { } #[module_main] -fn main(data: &[u8], _meta: Option<&[u8]>) -> Result { +fn main(data: &[u8], meta: Option<&[u8]>) -> Result { let mut test = TestProto2::new(); test.set_int32_zero(0); @@ -178,5 +178,7 @@ fn main(data: &[u8], _meta: Option<&[u8]>) -> Result { test.set_timestamp(1748591440); + test.metadata = meta.map(Vec::from); + Ok(test) } diff --git a/py/src/lib.rs b/py/src/lib.rs index 088169120..03916a3ed 100644 --- a/py/src/lib.rs +++ b/py/src/lib.rs @@ -15,6 +15,7 @@ matches = rules.scan(b'some dummy data') #![deny(missing_docs)] use std::borrow::Cow; +use std::collections::HashMap; use std::io::{Read, Write}; use std::marker::PhantomPinned; use std::ops::Deref; @@ -38,7 +39,6 @@ use pyo3::{create_exception, IntoPyObjectExt}; use strum_macros::{Display, EnumString}; use ::yara_x as yrx; - use yara_x_fmt::Indentation; fn dict_to_json(dict: Bound) -> PyResult { @@ -615,6 +615,55 @@ impl Compiler { } } +/// Optional information for the scan operation. +#[pyclass] +struct ScanOptions { + module_metadata: HashMap>, +} + +impl<'a> From<&'a ScanOptions> for yrx::ScanOptions<'a> { + fn from(options: &'a ScanOptions) -> Self { + let mut result = yrx::ScanOptions::new(); + for (module_name, metadata) in &options.module_metadata { + result = result.set_module_metadata( + module_name.as_str(), + metadata.as_slice(), + ); + } + result + } +} + +#[pymethods] +impl ScanOptions { + /// Creates a new [`ScanOptions`]. + #[new] + fn new() -> Self { + Self { module_metadata: HashMap::new() } + } + + /// Sets the data associated with a YARA module. + /// + /// When scanning a file, YARA modules may require additional data that is + /// not present in the file itself. For instance, the `cuckoo` module may + /// need a report from Cuckoo sandbox with information about the file being + /// scanned. + /// + /// This function is used for providing that data to the modules. The data + /// is specific to the module, and each module expects a different data + /// structure. The data is passed as raw bytes that the module is responsible + /// to decode accordingly. + fn set_module_metadata( + &mut self, + module: &str, + metadata: Bound, + ) -> PyResult<()> { + let metadata = metadata.extract::>()?; + self.module_metadata.insert(module.to_string(), metadata); + Ok(()) + } +} + /// Scans data with already compiled YARA rules. /// /// The scanner receives a set of compiled Rules and scans data with those @@ -734,11 +783,37 @@ impl Scanner { Python::attach(|py| scan_results_to_py(py, results)) } + /// Like [`Scanner::scan`], but allows to specify additional scan options. + fn scan_with_options( + &mut self, + data: &[u8], + options: &ScanOptions, + ) -> PyResult> { + let results = self + .inner + .scan_with_options(data, yrx::ScanOptions::from(options)) + .map_err(map_scan_err)?; + Python::attach(|py| scan_results_to_py(py, results)) + } + /// Scans a file. fn scan_file(&mut self, path: PathBuf) -> PyResult> { let results = self.inner.scan_file(path).map_err(map_scan_err)?; Python::attach(|py| scan_results_to_py(py, results)) } + + /// Like [`Scanner::scan_file`], but allows to specify additional scan options. + fn scan_file_with_options( + &mut self, + path: PathBuf, + options: &ScanOptions, + ) -> PyResult> { + let results = self + .inner + .scan_file_with_options(path, yrx::ScanOptions::from(options)) + .map_err(map_scan_err)?; + Python::attach(|py| scan_results_to_py(py, results)) + } } /// Results produced by a scan operation. @@ -923,6 +998,19 @@ impl Rules { Python::attach(|py| scan_results_to_py(py, results)) } + /// Scans in-memory data with these rules. + fn scan_with_options( + &self, + data: &[u8], + options: &ScanOptions, + ) -> PyResult> { + let mut scanner = yrx::Scanner::new(&self.inner.rules); + let results = scanner + .scan_with_options(data, yrx::ScanOptions::from(options)) + .map_err(|err| ScanError::new_err(err.to_string()))?; + Python::attach(|py| scan_results_to_py(py, results)) + } + /// Serializes the rules into a file-like object. fn serialize_into(&self, file: Py) -> PyResult<()> { self.inner @@ -1207,6 +1295,7 @@ fn yara_x(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_function(wrap_pyfunction!(module_names, m)?)?; m.add_class::()?; m.add_class::()?; + m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; diff --git a/py/tests/test_api.py b/py/tests/test_api.py index 1e72d29d7..587162b4f 100644 --- a/py/tests/test_api.py +++ b/py/tests/test_api.py @@ -232,6 +232,17 @@ def test_scanner_max_matches_per_pattern(): assert len(matching_rules) == 1 +def test_scan_options(): + if 'test_proto2' not in yara_x.module_names(): + return + + rules = yara_x.compile('import "test_proto2" rule foo {condition: false}') + options = yara_x.ScanOptions() + options.set_module_metadata('test_proto2', b'foo bar baz') + module_outputs = rules.scan_with_options(b'', options).module_outputs + assert module_outputs['test_proto2']['metadata'] == b'foo bar baz' + + def test_module_outputs(): if 'test_proto2' not in yara_x.module_names(): return @@ -244,7 +255,6 @@ def test_module_outputs(): assert module_outputs['test_proto2']['bytes_raw'] == b'\xfcH\x83\xe4\xf0\xeb3]\x8bE\x00H' assert module_outputs['test_proto2']['timestamp'] == datetime.datetime(2025, 5, 30, 7, 50, 40, tzinfo=datetime.timezone.utc) - def test_ignored_modules(): compiler = yara_x.Compiler() compiler.ignore_module("unsupported_module")