Skip to content

Commit cd80efa

Browse files
wxsBSDplusvic
andauthored
feat: expose linter API of compiler to python bindings. (VirusTotal#612)
This commit exposes the linters to the python API via the Compiler class. With this you can now do things like: ``` import yara_x rule = ''' rule test : a b c { meta: author = "foo" strings: $a = "bar" condition: $a }''' c = yara_x.Compiler() c.allowed_metadata('author', yara_x.MetaType.STRING, regexp='bar') c.allowed_tags(['a', 'b'], error = True) ``` --------- Co-authored-by: Victor M. Alvarez <vmalvarez@virustotal.com>
1 parent 81fd409 commit cd80efa

File tree

5 files changed

+275
-7
lines changed

5 files changed

+275
-7
lines changed

Cargo.lock

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

py/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,13 +60,15 @@ pyo3 = { version = "0.28.2", features = [
6060
"abi3-py38",
6161
"extension-module",
6262
] }
63+
regex = { workspace = true }
6364
serde_json = { workspace = true }
6465
strum = { workspace = true }
6566
strum_macros = { workspace = true }
6667

6768
yara-x = { workspace = true }
6869
yara-x-proto-json = { workspace = true }
6970
yara-x-fmt = { workspace = true }
71+
yara-x-parser = { workspace = true }
7072

7173
[build-dependencies]
7274
pyo3-build-config = "0.28.2"

py/src/lib.rs

Lines changed: 162 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ use strum_macros::{Display, EnumString};
4040

4141
use ::yara_x as yrx;
4242
use yara_x_fmt::Indentation;
43+
use yara_x_parser::ast::MetaValue;
4344

4445
fn dict_to_json(dict: Bound<PyAny>) -> PyResult<serde_json::Value> {
4546
static JSON_DUMPS: PyOnceLock<Py<PyAny>> = PyOnceLock::new();
@@ -72,6 +73,36 @@ enum SupportedModules {
7273
Dex,
7374
}
7475

76+
// These are copies from the checker in the CLI, but exposing them in the API
77+
// for use here seems wrong. Maybe move them to a better place or just keep our
78+
// own copies here?
79+
fn is_sha256(s: &str) -> bool {
80+
s.len() == 64 && s.chars().all(|c| c.is_ascii_hexdigit())
81+
}
82+
83+
fn is_sha1(s: &str) -> bool {
84+
s.len() == 40 && s.chars().all(|c| c.is_ascii_hexdigit())
85+
}
86+
87+
fn is_md5(s: &str) -> bool {
88+
s.len() == 32 && s.chars().all(|c| c.is_ascii_hexdigit())
89+
}
90+
91+
/// Supported metadata types used to add linters to the compiler.
92+
#[pyclass(from_py_object)]
93+
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
94+
#[allow(clippy::upper_case_acronyms)]
95+
enum MetaType {
96+
STRING,
97+
INTEGER,
98+
FLOAT,
99+
BOOL,
100+
SHA256,
101+
SHA1,
102+
MD5,
103+
HASH,
104+
}
105+
75106
/// Formats YARA rules.
76107
#[pyclass(unsendable)]
77108
struct Formatter {
@@ -420,10 +451,17 @@ impl Compiler {
420451
/// will return an InvalidRuleName warning.
421452
///
422453
/// If the regexp does not compile a ValueError is returned.
423-
#[pyo3(signature = (regexp))]
424-
fn rule_name_regexp(&mut self, regexp: &str) -> PyResult<()> {
425-
let linter = yrx::linters::rule_name(regexp)
426-
.map_err(|err| PyValueError::new_err(err.to_string()))?;
454+
#[pyo3(signature = (regexp, error = false))]
455+
fn allowed_rule_name(
456+
&mut self,
457+
regexp: &str,
458+
error: bool,
459+
) -> PyResult<()> {
460+
let mut linter = match yrx::linters::rule_name(regexp) {
461+
Ok(linter) => linter,
462+
Err(err) => return Err(PyValueError::new_err(err.to_string())),
463+
};
464+
linter = linter.error(error);
427465
self.inner.add_linter(linter);
428466
Ok(())
429467
}
@@ -616,6 +654,125 @@ impl Compiler {
616654
.map_err(|err| PyValueError::new_err(err.to_string()))?;
617655
json_loads.call((warnings_json,), None)
618656
}
657+
658+
#[pyo3(signature = (tags, error = false))]
659+
fn allowed_tags(
660+
&mut self,
661+
tags: Vec<String>,
662+
error: bool,
663+
) -> PyResult<()> {
664+
self.inner.add_linter(yrx::linters::tags_allowed(tags).error(error));
665+
Ok(())
666+
}
667+
668+
#[pyo3(signature = (regexp, error = false))]
669+
fn allowed_tags_regex(
670+
&mut self,
671+
regexp: String,
672+
error: bool,
673+
) -> PyResult<()> {
674+
let mut linter = match yrx::linters::tag_regex(regexp) {
675+
Ok(linter) => linter,
676+
Err(err) => return Err(PyValueError::new_err(err.to_string())),
677+
};
678+
linter = linter.error(error);
679+
self.inner.add_linter(linter);
680+
Ok(())
681+
}
682+
683+
#[pyo3(signature = (
684+
identifier,
685+
value_type,
686+
required = false,
687+
error = false,
688+
regexp = None
689+
))]
690+
fn allowed_metadata(
691+
&mut self,
692+
identifier: &str,
693+
value_type: MetaType,
694+
required: bool,
695+
error: bool,
696+
regexp: Option<String>,
697+
) -> PyResult<()> {
698+
let mut linter =
699+
yrx::linters::metadata(identifier).required(required).error(error);
700+
match value_type {
701+
MetaType::STRING => {
702+
let message = if let Some(regexp) = regexp.clone() {
703+
let _ = regex::bytes::Regex::new(regexp.as_str())
704+
.map_err(|err| PyValueError::new_err(err.to_string()));
705+
format!(
706+
"`{identifier}` must be a string that matches `/{regexp}/`"
707+
)
708+
} else {
709+
format!("`{identifier}` must be a string")
710+
};
711+
linter = linter.validator(
712+
move |meta| match (&meta.value, &regexp) {
713+
(MetaValue::String((s, _)), Some(regexp)) => {
714+
regex::Regex::new(regexp.as_str())
715+
.unwrap()
716+
.is_match(s)
717+
}
718+
(MetaValue::Bytes((s, _)), Some(regexp)) => {
719+
regex::bytes::Regex::new(regexp.as_str())
720+
.unwrap()
721+
.is_match(s)
722+
}
723+
(MetaValue::String(_), None) => true,
724+
(MetaValue::Bytes(_), None) => true,
725+
_ => false,
726+
},
727+
message,
728+
);
729+
}
730+
MetaType::INTEGER => {
731+
linter = linter.validator(
732+
|meta| matches!(meta.value, MetaValue::Integer(_)),
733+
format!("`{identifier}` must be an integer"),
734+
);
735+
}
736+
MetaType::FLOAT => {
737+
linter = linter.validator(
738+
|meta| matches!(meta.value, MetaValue::Float(_)),
739+
format!("`{identifier}` must be a float"),
740+
);
741+
}
742+
MetaType::BOOL => {
743+
linter = linter.validator(
744+
|meta| matches!(meta.value, MetaValue::Bool(_)),
745+
format!("`{identifier}` must be a bool"),
746+
);
747+
}
748+
MetaType::SHA256 => {
749+
linter = linter.validator(
750+
|meta| matches!(meta.value, MetaValue::String((s,_)) if is_sha256(s)),
751+
format!("`{identifier}` must be a SHA-256"),
752+
);
753+
}
754+
MetaType::SHA1 => {
755+
linter = linter.validator(
756+
|meta| matches!(meta.value, MetaValue::String((s,_)) if is_sha1(s)),
757+
format!("`{identifier}` must be a SHA-1"),
758+
);
759+
}
760+
MetaType::MD5 => {
761+
linter = linter.validator(
762+
|meta| matches!(meta.value, MetaValue::String((s,_)) if is_md5(s)),
763+
format!("`{identifier}` must be a MD5"),
764+
);
765+
}
766+
MetaType::HASH => {
767+
linter = linter.validator(
768+
|meta| matches!(meta.value, MetaValue::String((s,_)) if is_md5(s) || is_sha1(s) || is_sha256(s)),
769+
format!("`{identifier}` must be a MD5, SHA-1 or SHA-256"),
770+
);
771+
}
772+
}
773+
self.inner.add_linter(linter);
774+
Ok(())
775+
}
619776
}
620777

621778
/// Optional information for the scan operation.
@@ -1306,6 +1463,7 @@ fn yara_x(m: &Bound<'_, PyModule>) -> PyResult<()> {
13061463
m.add_class::<Match>()?;
13071464
m.add_class::<Formatter>()?;
13081465
m.add_class::<Module>()?;
1466+
m.add_class::<MetaType>()?;
13091467
m.gil_used(false)?;
13101468
Ok(())
13111469
}

py/tests/test_api.py

Lines changed: 58 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ def test_error_on_slow_pattern():
3232
def test_invalid_rule_name_regexp():
3333
compiler = yara_x.Compiler()
3434
with pytest.raises(ValueError):
35-
compiler.rule_name_regexp("(AXS|ERS")
35+
compiler.allowed_rule_name("(AXS|ERS")
3636

3737

3838
def test_int_globals():
@@ -397,3 +397,60 @@ def test_rules_imports():
397397
}
398398
''')
399399
assert rules.imports() == ["pe", "elf"]
400+
401+
def test_check_allowed_tags_error():
402+
rule = '''
403+
rule test: a b c d { condition: 1 + 1 == 2}
404+
rule test2: d { condition: 1 + 1 == 2}'''
405+
compiler = yara_x.Compiler()
406+
compiler.allowed_tags(['a', 'b'], error = True)
407+
with pytest.raises(yara_x.CompileError,
408+
match="tag `c` not in allowed list"):
409+
compiler.add_source(rule)
410+
# The current behavior is stop checking tags on the rule after the first tag
411+
# fails, but subsequent rules are also checked.
412+
errors = compiler.errors()
413+
assert len(errors) == 2
414+
assert 'tag `c` not in allowed list' in errors[0]['text']
415+
assert 'tag `d` not in allowed list' in errors[1]['text']
416+
417+
def test_check_allowed_tags_warning():
418+
compiler = yara_x.Compiler()
419+
compiler.allowed_tags(['a', 'b'])
420+
compiler.add_source('rule test: a b c d { condition: 1 + 1 == 2}')
421+
warnings = compiler.warnings()
422+
assert len(warnings) == 2
423+
assert 'tag `c` not in allowed list' in warnings[0]['text']
424+
assert 'tag `d` not in allowed list' in warnings[1]['text']
425+
426+
def test_check_metadata():
427+
compiler = yara_x.Compiler()
428+
compiler.allowed_metadata('a', yara_x.MetaType.STRING)
429+
compiler.allowed_metadata('b', yara_x.MetaType.STRING, regexp='^bar')
430+
compiler.add_source('rule test { meta: a = 1 b = "foo" condition: 1 + 1 == 2}')
431+
warnings = compiler.warnings()
432+
assert len(warnings) == 2
433+
assert '`a` must be a string' in warnings[0]['text']
434+
assert '`b` must be a string that matches `/^bar/`' in warnings[1]['text']
435+
436+
def test_check_rule_name_regexp():
437+
rule = '''
438+
rule test { condition: 1 + 1 == 2}
439+
rule test2 { condition: 1 + 1 == 2}'''
440+
compiler = yara_x.Compiler()
441+
compiler.allowed_rule_name('^foo')
442+
compiler.add_source(rule)
443+
warnings = compiler.warnings()
444+
assert len(warnings) == 2
445+
assert 'this rule name does not match regex `^foo`' in warnings[0]['text']
446+
447+
def test_check_rule_name_regexp_error():
448+
rule = '''
449+
rule test { condition: 1 + 1 == 2}
450+
rule test2 { condition: 1 + 1 == 2}'''
451+
compiler = yara_x.Compiler()
452+
compiler.allowed_rule_name('^foo', error = True)
453+
with pytest.raises(yara_x.CompileError,
454+
match=r"this rule name does not match regex `\^foo`"):
455+
compiler.add_source(rule)
456+
assert len(compiler.errors()) == 2

py/yara_x.pyi

Lines changed: 51 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import collections
22

3-
from typing import Any, Dict, BinaryIO, TextIO, Optional, Tuple, final
3+
from typing import Any, Dict, BinaryIO, TextIO, Optional, Tuple, final, List
4+
from enum import Enum
45

56
class CompileError(Exception):
67
r"""
@@ -156,7 +157,7 @@ class Compiler:
156157
"""
157158
...
158159

159-
def rule_name_regexp(self, regexp: str) -> None:
160+
def allowed_rule_name(self, regexp: str, error: bool = False) -> None:
160161
r"""
161162
Tell the compiler that any rule must match this regular expression or it
162163
will result in a compiler warning.
@@ -168,6 +169,24 @@ class Compiler:
168169
"""
169170
...
170171

172+
def allowed_tags(self, tags: List[str], error: bool = False) -> None:
173+
r"""List the allowed tags for rules."""
174+
...
175+
176+
def allowed_tags_regex(self, regexp: str, error: bool = False) -> None:
177+
r"""A regular expression that must match all tags on rules."""
178+
...
179+
180+
def allowed_metadata(
181+
self,
182+
identifier: str,
183+
value_type: MetaType,
184+
required: bool,
185+
regexp: Optional[str],
186+
error: bool = False):
187+
r"""Define expected type and value for metadata on rules."""
188+
...
189+
171190
@final
172191
class ScanOptions:
173192
r"""
@@ -480,3 +499,33 @@ class Module:
480499
def invoke(self, data: str) -> Any:
481500
r"""Parse the data and collect module metadata."""
482501
...
502+
503+
@final
504+
class MetaType(Enum):
505+
STRING: int
506+
INTEGER: int
507+
FLOAT: int
508+
BOOL: int
509+
SHA256: int
510+
SHA1: int
511+
MD5: int
512+
HASH: int
513+
514+
@final
515+
class CheckResult:
516+
r"""Result from the [`Compiler::check`] method after checking source code."""
517+
def warning(self) -> bool:
518+
r"""True if the result is a warning, false if it is an error."""
519+
...
520+
521+
def code(self) -> bool:
522+
r"""The string representation of the result code."""
523+
...
524+
525+
def title(self) -> str:
526+
r"""The title of the result code."""
527+
...
528+
529+
def message(self) -> str:
530+
r"""A multi-line message containing code, title and full compiler details."""
531+
...

0 commit comments

Comments
 (0)