|
| 1 | +use std::collections::{BTreeMap, HashSet}; |
| 2 | +use std::fs; |
| 3 | +use std::path::PathBuf; |
| 4 | + |
| 5 | +use ::ascii_tree::write_tree; |
| 6 | +use ::ascii_tree::Tree; |
| 7 | +use ::ascii_tree::Tree::Node; |
| 8 | +use anyhow::{bail, Context}; |
| 9 | +use clap::{arg, value_parser, ArgAction, ArgMatches, Command}; |
| 10 | + |
| 11 | +use yara_x_parser::ast::dfs::{DFSContext, DFSEvent, DFSIter}; |
| 12 | +use yara_x_parser::ast::{Expr, AST}; |
| 13 | +use yara_x_parser::Parser; |
| 14 | + |
| 15 | +#[derive(Debug, Default)] |
| 16 | +struct Deps<'a> { |
| 17 | + rules: HashSet<&'a str>, |
| 18 | + modules: HashSet<&'a str>, |
| 19 | +} |
| 20 | + |
| 21 | +pub fn deps() -> Command { |
| 22 | + super::command("deps") |
| 23 | + .about("Show rule dependencies and modules") |
| 24 | + // The `deps` command is not ready yet. |
| 25 | + .hide(true) |
| 26 | + .arg( |
| 27 | + arg!(<RULES_PATH>) |
| 28 | + .help("Path to YARA source file") |
| 29 | + .value_parser(value_parser!(PathBuf)), |
| 30 | + ) |
| 31 | + .arg( |
| 32 | + arg!(-r - -"rule") |
| 33 | + .required(false) |
| 34 | + .help("Rules to display dependency information for") |
| 35 | + .action(ArgAction::Append), |
| 36 | + ) |
| 37 | +} |
| 38 | + |
| 39 | +pub fn exec_deps(args: &ArgMatches) -> anyhow::Result<()> { |
| 40 | + let rules_path = args.get_one::<PathBuf>("RULES_PATH").unwrap(); |
| 41 | + let requested_rules = args.get_many::<String>("rule"); |
| 42 | + |
| 43 | + let requested_rules: Vec<_> = requested_rules |
| 44 | + .map_or(Vec::new(), |v| v.collect()) |
| 45 | + .into_iter() |
| 46 | + .map(|v| v.as_str()) |
| 47 | + .collect(); |
| 48 | + |
| 49 | + let src = fs::read(rules_path) |
| 50 | + .with_context(|| format!("can not read `{}`", rules_path.display()))?; |
| 51 | + |
| 52 | + let parser = Parser::new(src.as_slice()); |
| 53 | + let ast: AST = parser.into(); |
| 54 | + |
| 55 | + if !ast.errors().is_empty() { |
| 56 | + for err in ast.errors().iter() { |
| 57 | + println!("{err:?}"); |
| 58 | + } |
| 59 | + bail!("{} syntax error(s) found", ast.errors().len()); |
| 60 | + } |
| 61 | + |
| 62 | + // Map of rules to dependencies and modules they use. |
| 63 | + // |
| 64 | + // Given these rules: |
| 65 | + // |
| 66 | + // rule a { condition: pe.is_dll() } |
| 67 | + // rule b { condition: a or x } |
| 68 | + // |
| 69 | + // Deps would look like: |
| 70 | + // |
| 71 | + // { |
| 72 | + // "a": Deps { rules: {}, modules: {"pe"} }, |
| 73 | + // "b": Deps { rules: {"a"}, modules: {} } |
| 74 | + // } |
| 75 | + // |
| 76 | + // The unknown identifier "x" is silently ignored. |
| 77 | + let mut dep_map: BTreeMap<&str, Deps> = BTreeMap::new(); |
| 78 | + |
| 79 | + for rule in ast.rules() { |
| 80 | + if dep_map.insert(rule.identifier.name, Deps::default()).is_some() { |
| 81 | + bail!("Duplicate rule \"{}\" found", rule.identifier.name); |
| 82 | + }; |
| 83 | + find_dependencies(&rule.condition, rule.identifier.name, &mut dep_map); |
| 84 | + } |
| 85 | + |
| 86 | + let dep_tree = generate_dep_tree(&dep_map, &requested_rules); |
| 87 | + |
| 88 | + for dep in dep_tree.iter() { |
| 89 | + let mut output = String::new(); |
| 90 | + write_tree(&mut output, &dep)?; |
| 91 | + println!("{output}"); |
| 92 | + } |
| 93 | + |
| 94 | + Ok(()) |
| 95 | +} |
| 96 | + |
| 97 | +fn generate_dep_tree( |
| 98 | + dep_map: &BTreeMap<&str, Deps>, |
| 99 | + requested_rules: &Vec<&str>, |
| 100 | +) -> Vec<Tree> { |
| 101 | + let mut nodes: Vec<Tree> = Vec::new(); |
| 102 | + |
| 103 | + for (rule, deps) in dep_map.iter() { |
| 104 | + if requested_rules.is_empty() || requested_rules.contains(rule) { |
| 105 | + nodes.push(tree_for_rule(rule, &deps, &dep_map)); |
| 106 | + } |
| 107 | + } |
| 108 | + |
| 109 | + nodes |
| 110 | +} |
| 111 | + |
| 112 | +fn tree_for_rule( |
| 113 | + rule: &str, |
| 114 | + deps: &Deps, |
| 115 | + dep_map: &BTreeMap<&str, Deps>, |
| 116 | +) -> Tree { |
| 117 | + let mut nodes: Vec<Tree> = Vec::new(); |
| 118 | + |
| 119 | + for module in deps.modules.iter() { |
| 120 | + nodes.push(Node(format!("mod: {module}"), vec![])); |
| 121 | + } |
| 122 | + |
| 123 | + for dep in deps.rules.iter() { |
| 124 | + match dep_map.get(dep) { |
| 125 | + Some(new_deps) => { |
| 126 | + nodes.push(tree_for_rule(dep, new_deps, dep_map)); |
| 127 | + } |
| 128 | + None => { |
| 129 | + nodes.push(Node(dep.to_string(), vec![])); |
| 130 | + } |
| 131 | + } |
| 132 | + } |
| 133 | + |
| 134 | + Node(rule.to_string(), nodes) |
| 135 | +} |
| 136 | + |
| 137 | +fn find_dependencies<'a>( |
| 138 | + expr: &'a Expr<'a>, |
| 139 | + rule_name: &'a str, |
| 140 | + dep_map: &mut BTreeMap<&'a str, Deps<'a>>, |
| 141 | +) { |
| 142 | + // Contains the variables that are currently defined. This acts |
| 143 | + // as a stack where the variables defined by the innermost `for` |
| 144 | + // or `with` statements are at top of the array. |
| 145 | + let mut variables = Vec::new(); |
| 146 | + // The `scopes` array contains the indexes within the `variables` |
| 147 | + // array where a scope start. For instance, if we have two nested |
| 148 | + // `with` statements where the outermost one defines variables `a` |
| 149 | + // and `b`, while the innermost defines variables `c` and `d`, the |
| 150 | + // `variables` vector will contain [`a`, `b`, `c`, `d`] and the |
| 151 | + // `scopes` vector will contain: [2], which indicates that index |
| 152 | + // within `variables` where the innermost scope starts. |
| 153 | + let mut scopes = Vec::new(); |
| 154 | + |
| 155 | + let mut dfs = DFSIter::new(expr); |
| 156 | + while let Some(event) = dfs.next() { |
| 157 | + match event { |
| 158 | + DFSEvent::Enter(expr) => { |
| 159 | + match dfs.contexts().next() { |
| 160 | + Some(DFSContext::Body(Expr::ForIn(for_in))) => { |
| 161 | + scopes.push(variables.len()); |
| 162 | + variables |
| 163 | + .extend(for_in.variables.iter().map(|v| v.name)); |
| 164 | + } |
| 165 | + Some(DFSContext::Body(Expr::With(with))) => { |
| 166 | + scopes.push(variables.len()); |
| 167 | + variables.extend( |
| 168 | + with.declarations |
| 169 | + .iter() |
| 170 | + .map(|d| d.identifier.name), |
| 171 | + ); |
| 172 | + } |
| 173 | + _ => {} |
| 174 | + } |
| 175 | + if let Expr::Ident(ident) = expr { |
| 176 | + // If this is a known variable, ignore it. |
| 177 | + if variables.contains(&ident.name) { |
| 178 | + continue; |
| 179 | + } |
| 180 | + if dep_map.contains_key(ident.name) { |
| 181 | + // This is an identifier that matches a previously |
| 182 | + // seen rule. |
| 183 | + dep_map.entry(rule_name).and_modify(|v| { |
| 184 | + v.rules.insert(ident.name); |
| 185 | + }); |
| 186 | + } else if yara_x::mods::module_names() |
| 187 | + .any(|module| module == ident.name) |
| 188 | + { |
| 189 | + // This is a known module or is not in the list of |
| 190 | + // variable identifier to be ignored. |
| 191 | + dep_map.entry(rule_name).and_modify(|v| { |
| 192 | + v.modules.insert(ident.name); |
| 193 | + }); |
| 194 | + } |
| 195 | + } |
| 196 | + } |
| 197 | + DFSEvent::Leave(expr) => { |
| 198 | + // When leaving a `for` or `with` statement, remove all the |
| 199 | + // variables they defined. |
| 200 | + if matches!(expr, Expr::ForIn(_) | Expr::With(_)) { |
| 201 | + variables.drain(scopes.pop().unwrap()..); |
| 202 | + } |
| 203 | + // When leaving the operand of a FieldAccess expression we prune |
| 204 | + // the DFS tree, which prevents the siblings of this node from |
| 205 | + // being traversed. This implies that only the first operand of the |
| 206 | + // FieldAccess node is visited. The rest of the operands of a |
| 207 | + // field access expression can contain identifiers, but those |
| 208 | + // identifiers will correspond to some field in a structure, not |
| 209 | + // to a variable or module name. |
| 210 | + if matches!( |
| 211 | + dfs.contexts().next(), |
| 212 | + Some(DFSContext::Operand(Expr::FieldAccess(_))) |
| 213 | + ) { |
| 214 | + dfs.prune(); |
| 215 | + } |
| 216 | + } |
| 217 | + } |
| 218 | + } |
| 219 | +} |
0 commit comments