Skip to content

Commit b0486ff

Browse files
wxsBSDplusvic
andauthored
feat: add "deps" command to generate a graph of rule dependencies. (VirusTotal#498)
Given a set of rules parse it and walk the AST to find identifiers and generate an ASCII tree that show the modules and other rules that each rule depends on. By default it generates a graph of all the rules, but you can select any number of rules with the -r argument. For example, given these rules: ``` rule a { condition: pe.is_dll() } rule b { condition: a } rule c { condition: b } rule d { condition: false } ``` And selecting using `-r b` you get output that looks like this: ``` a └─ mod: pe b └─ a └─ mod: pe c └─ b └─ a └─ mod: pe d ``` --------- Co-authored-by: Victor M. Alvarez <vmalvarez@virustotal.com>
1 parent b8083fa commit b0486ff

7 files changed

Lines changed: 654 additions & 0 deletions

File tree

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

cli/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ rules-profiling = ["yara-x/rules-profiling"]
4646

4747
[dependencies]
4848
anyhow = { workspace = true }
49+
ascii_tree = { workspace = true }
4950
clap = { workspace = true, features = ["cargo", "derive"] }
5051
clap_complete = { workspace = true }
5152
figment = { workspace = true, features = ["toml"] }

cli/src/commands/deps.rs

Lines changed: 219 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,219 @@
1+
use std::collections::{BTreeMap, HashSet};
2+
use std::fs;
3+
use std::path::PathBuf;
4+
5+
use ::ascii_tree::write_tree;
6+
use ::ascii_tree::Tree;
7+
use ::ascii_tree::Tree::Node;
8+
use anyhow::{bail, Context};
9+
use clap::{arg, value_parser, ArgAction, ArgMatches, Command};
10+
11+
use yara_x_parser::ast::dfs::{DFSContext, DFSEvent, DFSIter};
12+
use yara_x_parser::ast::{Expr, AST};
13+
use yara_x_parser::Parser;
14+
15+
#[derive(Debug, Default)]
16+
struct Deps<'a> {
17+
rules: HashSet<&'a str>,
18+
modules: HashSet<&'a str>,
19+
}
20+
21+
pub fn deps() -> Command {
22+
super::command("deps")
23+
.about("Show rule dependencies and modules")
24+
// The `deps` command is not ready yet.
25+
.hide(true)
26+
.arg(
27+
arg!(<RULES_PATH>)
28+
.help("Path to YARA source file")
29+
.value_parser(value_parser!(PathBuf)),
30+
)
31+
.arg(
32+
arg!(-r - -"rule")
33+
.required(false)
34+
.help("Rules to display dependency information for")
35+
.action(ArgAction::Append),
36+
)
37+
}
38+
39+
pub fn exec_deps(args: &ArgMatches) -> anyhow::Result<()> {
40+
let rules_path = args.get_one::<PathBuf>("RULES_PATH").unwrap();
41+
let requested_rules = args.get_many::<String>("rule");
42+
43+
let requested_rules: Vec<_> = requested_rules
44+
.map_or(Vec::new(), |v| v.collect())
45+
.into_iter()
46+
.map(|v| v.as_str())
47+
.collect();
48+
49+
let src = fs::read(rules_path)
50+
.with_context(|| format!("can not read `{}`", rules_path.display()))?;
51+
52+
let parser = Parser::new(src.as_slice());
53+
let ast: AST = parser.into();
54+
55+
if !ast.errors().is_empty() {
56+
for err in ast.errors().iter() {
57+
println!("{err:?}");
58+
}
59+
bail!("{} syntax error(s) found", ast.errors().len());
60+
}
61+
62+
// Map of rules to dependencies and modules they use.
63+
//
64+
// Given these rules:
65+
//
66+
// rule a { condition: pe.is_dll() }
67+
// rule b { condition: a or x }
68+
//
69+
// Deps would look like:
70+
//
71+
// {
72+
// "a": Deps { rules: {}, modules: {"pe"} },
73+
// "b": Deps { rules: {"a"}, modules: {} }
74+
// }
75+
//
76+
// The unknown identifier "x" is silently ignored.
77+
let mut dep_map: BTreeMap<&str, Deps> = BTreeMap::new();
78+
79+
for rule in ast.rules() {
80+
if dep_map.insert(rule.identifier.name, Deps::default()).is_some() {
81+
bail!("Duplicate rule \"{}\" found", rule.identifier.name);
82+
};
83+
find_dependencies(&rule.condition, rule.identifier.name, &mut dep_map);
84+
}
85+
86+
let dep_tree = generate_dep_tree(&dep_map, &requested_rules);
87+
88+
for dep in dep_tree.iter() {
89+
let mut output = String::new();
90+
write_tree(&mut output, &dep)?;
91+
println!("{output}");
92+
}
93+
94+
Ok(())
95+
}
96+
97+
fn generate_dep_tree(
98+
dep_map: &BTreeMap<&str, Deps>,
99+
requested_rules: &Vec<&str>,
100+
) -> Vec<Tree> {
101+
let mut nodes: Vec<Tree> = Vec::new();
102+
103+
for (rule, deps) in dep_map.iter() {
104+
if requested_rules.is_empty() || requested_rules.contains(rule) {
105+
nodes.push(tree_for_rule(rule, &deps, &dep_map));
106+
}
107+
}
108+
109+
nodes
110+
}
111+
112+
fn tree_for_rule(
113+
rule: &str,
114+
deps: &Deps,
115+
dep_map: &BTreeMap<&str, Deps>,
116+
) -> Tree {
117+
let mut nodes: Vec<Tree> = Vec::new();
118+
119+
for module in deps.modules.iter() {
120+
nodes.push(Node(format!("mod: {module}"), vec![]));
121+
}
122+
123+
for dep in deps.rules.iter() {
124+
match dep_map.get(dep) {
125+
Some(new_deps) => {
126+
nodes.push(tree_for_rule(dep, new_deps, dep_map));
127+
}
128+
None => {
129+
nodes.push(Node(dep.to_string(), vec![]));
130+
}
131+
}
132+
}
133+
134+
Node(rule.to_string(), nodes)
135+
}
136+
137+
fn find_dependencies<'a>(
138+
expr: &'a Expr<'a>,
139+
rule_name: &'a str,
140+
dep_map: &mut BTreeMap<&'a str, Deps<'a>>,
141+
) {
142+
// Contains the variables that are currently defined. This acts
143+
// as a stack where the variables defined by the innermost `for`
144+
// or `with` statements are at top of the array.
145+
let mut variables = Vec::new();
146+
// The `scopes` array contains the indexes within the `variables`
147+
// array where a scope start. For instance, if we have two nested
148+
// `with` statements where the outermost one defines variables `a`
149+
// and `b`, while the innermost defines variables `c` and `d`, the
150+
// `variables` vector will contain [`a`, `b`, `c`, `d`] and the
151+
// `scopes` vector will contain: [2], which indicates that index
152+
// within `variables` where the innermost scope starts.
153+
let mut scopes = Vec::new();
154+
155+
let mut dfs = DFSIter::new(expr);
156+
while let Some(event) = dfs.next() {
157+
match event {
158+
DFSEvent::Enter(expr) => {
159+
match dfs.contexts().next() {
160+
Some(DFSContext::Body(Expr::ForIn(for_in))) => {
161+
scopes.push(variables.len());
162+
variables
163+
.extend(for_in.variables.iter().map(|v| v.name));
164+
}
165+
Some(DFSContext::Body(Expr::With(with))) => {
166+
scopes.push(variables.len());
167+
variables.extend(
168+
with.declarations
169+
.iter()
170+
.map(|d| d.identifier.name),
171+
);
172+
}
173+
_ => {}
174+
}
175+
if let Expr::Ident(ident) = expr {
176+
// If this is a known variable, ignore it.
177+
if variables.contains(&ident.name) {
178+
continue;
179+
}
180+
if dep_map.contains_key(ident.name) {
181+
// This is an identifier that matches a previously
182+
// seen rule.
183+
dep_map.entry(rule_name).and_modify(|v| {
184+
v.rules.insert(ident.name);
185+
});
186+
} else if yara_x::mods::module_names()
187+
.any(|module| module == ident.name)
188+
{
189+
// This is a known module or is not in the list of
190+
// variable identifier to be ignored.
191+
dep_map.entry(rule_name).and_modify(|v| {
192+
v.modules.insert(ident.name);
193+
});
194+
}
195+
}
196+
}
197+
DFSEvent::Leave(expr) => {
198+
// When leaving a `for` or `with` statement, remove all the
199+
// variables they defined.
200+
if matches!(expr, Expr::ForIn(_) | Expr::With(_)) {
201+
variables.drain(scopes.pop().unwrap()..);
202+
}
203+
// When leaving the operand of a FieldAccess expression we prune
204+
// the DFS tree, which prevents the siblings of this node from
205+
// being traversed. This implies that only the first operand of the
206+
// FieldAccess node is visited. The rest of the operands of a
207+
// field access expression can contain identifiers, but those
208+
// identifiers will correspond to some field in a structure, not
209+
// to a variable or module name.
210+
if matches!(
211+
dfs.contexts().next(),
212+
Some(DFSContext::Operand(Expr::FieldAccess(_)))
213+
) {
214+
dfs.prune();
215+
}
216+
}
217+
}
218+
}
219+
}

cli/src/commands/mod.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ mod check;
22
mod compile;
33
mod completion;
44
mod debug;
5+
mod deps;
56
mod dump;
67
mod fix;
78
mod fmt;
@@ -12,6 +13,7 @@ pub use compile::*;
1213
pub use completion::*;
1314
#[cfg(feature = "debug-cmd")]
1415
pub use debug::*;
16+
pub use deps::*;
1517
pub use dump::*;
1618
pub use fix::*;
1719
pub use fmt::*;
@@ -70,6 +72,7 @@ pub fn cli() -> Command {
7072
commands::fmt(),
7173
commands::fix(),
7274
commands::completion(),
75+
commands::deps(),
7376
])
7477
}
7578

cli/src/main.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ fn main() -> anyhow::Result<()> {
9191
Some(("dump", args)) => commands::exec_dump(args),
9292
Some(("compile", args)) => commands::exec_compile(args, &config),
9393
Some(("completion", args)) => commands::exec_completion(args),
94+
Some(("deps", args)) => commands::exec_deps(args),
9495
_ => unreachable!(),
9596
};
9697

0 commit comments

Comments
 (0)