Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions lib/src/modules/protos/test_proto2.proto
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,9 @@ message TestProto2 {
help: "use `foo` instead",
replacement: "foo"
}];

// The metadata received by the module is copied into this field.
optional bytes metadata = 502;
}

enum TopLevelEnumeration {
Expand Down
4 changes: 3 additions & 1 deletion lib/src/modules/test_proto2/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ fn to_int(ctx: &ScanContext, string: RuntimeString) -> Option<i64> {
}

#[module_main]
fn main(data: &[u8], _meta: Option<&[u8]>) -> Result<TestProto2, ModuleError> {
fn main(data: &[u8], meta: Option<&[u8]>) -> Result<TestProto2, ModuleError> {
let mut test = TestProto2::new();

test.set_int32_zero(0);
Expand Down Expand Up @@ -178,5 +178,7 @@ fn main(data: &[u8], _meta: Option<&[u8]>) -> Result<TestProto2, ModuleError> {

test.set_timestamp(1748591440);

test.metadata = meta.map(Vec::from);

Ok(test)
}
91 changes: 90 additions & 1 deletion py/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ matches = rules.scan(b'some dummy data')

#![deny(missing_docs)]
use std::borrow::Cow;
use std::collections::HashMap;
use std::io::{Read, Write};
use std::marker::PhantomPinned;
use std::ops::Deref;
Expand All @@ -38,7 +39,6 @@ use pyo3::{create_exception, IntoPyObjectExt};
use strum_macros::{Display, EnumString};

use ::yara_x as yrx;

use yara_x_fmt::Indentation;

fn dict_to_json(dict: Bound<PyAny>) -> PyResult<serde_json::Value> {
Expand Down Expand Up @@ -615,6 +615,55 @@ impl Compiler {
}
}

/// Optional information for the scan operation.
#[pyclass]
struct ScanOptions {
module_metadata: HashMap<String, Vec<u8>>,
}

impl<'a> From<&'a ScanOptions> for yrx::ScanOptions<'a> {
fn from(options: &'a ScanOptions) -> Self {
let mut result = yrx::ScanOptions::new();
for (module_name, metadata) in &options.module_metadata {
result = result.set_module_metadata(
module_name.as_str(),
metadata.as_slice(),
);
}
result
}
}

#[pymethods]
impl ScanOptions {
/// Creates a new [`ScanOptions`].
#[new]
fn new() -> Self {
Self { module_metadata: HashMap::new() }
}

/// Sets the data associated with a YARA module.
///
/// When scanning a file, YARA modules may require additional data that is
/// not present in the file itself. For instance, the `cuckoo` module may
/// need a report from Cuckoo sandbox with information about the file being
/// scanned.
///
/// This function is used for providing that data to the modules. The data
/// is specific to the module, and each module expects a different data
/// structure. The data is passed as raw bytes that the module is responsible
/// to decode accordingly.
fn set_module_metadata(
&mut self,
module: &str,
metadata: Bound<PyBytes>,
) -> PyResult<()> {
let metadata = metadata.extract::<Vec<u8>>()?;
self.module_metadata.insert(module.to_string(), metadata);
Ok(())
}
}

/// Scans data with already compiled YARA rules.
///
/// The scanner receives a set of compiled Rules and scans data with those
Expand Down Expand Up @@ -734,11 +783,37 @@ impl Scanner {
Python::attach(|py| scan_results_to_py(py, results))
}

/// Like [`Scanner::scan`], but allows to specify additional scan options.
fn scan_with_options(
&mut self,
data: &[u8],
options: &ScanOptions,
) -> PyResult<Py<ScanResults>> {
let results = self
.inner
.scan_with_options(data, yrx::ScanOptions::from(options))
.map_err(map_scan_err)?;
Python::attach(|py| scan_results_to_py(py, results))
}

/// Scans a file.
fn scan_file(&mut self, path: PathBuf) -> PyResult<Py<ScanResults>> {
let results = self.inner.scan_file(path).map_err(map_scan_err)?;
Python::attach(|py| scan_results_to_py(py, results))
}

/// Like [`Scanner::scan_file`], but allows to specify additional scan options.
fn scan_file_with_options(
&mut self,
path: PathBuf,
options: &ScanOptions,
) -> PyResult<Py<ScanResults>> {
let results = self
.inner
.scan_file_with_options(path, yrx::ScanOptions::from(options))
.map_err(map_scan_err)?;
Python::attach(|py| scan_results_to_py(py, results))
}
}

/// Results produced by a scan operation.
Expand Down Expand Up @@ -923,6 +998,19 @@ impl Rules {
Python::attach(|py| scan_results_to_py(py, results))
}

/// Scans in-memory data with these rules.
fn scan_with_options(
&self,
data: &[u8],
options: &ScanOptions,
) -> PyResult<Py<ScanResults>> {
let mut scanner = yrx::Scanner::new(&self.inner.rules);
let results = scanner
.scan_with_options(data, yrx::ScanOptions::from(options))
.map_err(|err| ScanError::new_err(err.to_string()))?;
Python::attach(|py| scan_results_to_py(py, results))
}

/// Serializes the rules into a file-like object.
fn serialize_into(&self, file: Py<PyAny>) -> PyResult<()> {
self.inner
Expand Down Expand Up @@ -1207,6 +1295,7 @@ fn yara_x(m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_function(wrap_pyfunction!(module_names, m)?)?;
m.add_class::<Rules>()?;
m.add_class::<Scanner>()?;
m.add_class::<ScanOptions>()?;
m.add_class::<ScanResults>()?;
m.add_class::<Compiler>()?;
m.add_class::<Rule>()?;
Expand Down
12 changes: 11 additions & 1 deletion py/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,17 @@ def test_scanner_max_matches_per_pattern():
assert len(matching_rules) == 1


def test_scan_options():
if 'test_proto2' not in yara_x.module_names():
return

rules = yara_x.compile('import "test_proto2" rule foo {condition: false}')
options = yara_x.ScanOptions()
options.set_module_metadata('test_proto2', b'foo bar baz')
module_outputs = rules.scan_with_options(b'', options).module_outputs
assert module_outputs['test_proto2']['metadata'] == b'foo bar baz'


def test_module_outputs():
if 'test_proto2' not in yara_x.module_names():
return
Expand All @@ -244,7 +255,6 @@ def test_module_outputs():
assert module_outputs['test_proto2']['bytes_raw'] == b'\xfcH\x83\xe4\xf0\xeb3]\x8bE\x00H'
assert module_outputs['test_proto2']['timestamp'] == datetime.datetime(2025, 5, 30, 7, 50, 40, tzinfo=datetime.timezone.utc)


def test_ignored_modules():
compiler = yara_x.Compiler()
compiler.ignore_module("unsupported_module")
Expand Down
Loading