Skip to content

Commit 6ab30fa

Browse files
authored
fix(cli): preserve duplicate metadata keys in JSON output (VirusTotal#550)
When YARA rules contain multiple metadata entries with the same key (e.g., multiple hash entries), the JSON output now groups them into an array instead of silently discarding all but one value. This matches the behavior of original YARA's JSON output format.
1 parent 0d1763e commit 6ab30fa

3 files changed

Lines changed: 96 additions & 8 deletions

File tree

cli/src/commands/scan.rs

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1088,15 +1088,30 @@ mod output_handler {
10881088
})
10891089
.map(|rule| {
10901090
let meta = self.output_options.include_meta.then(|| {
1091-
rule.metadata()
1092-
.map(|(meta_key, meta_val)| {
1093-
let meta_key = meta_key.to_owned();
1094-
let meta_val = serde_json::to_value(meta_val)
1095-
.expect(
1096-
"Derived Serialize impl should never fail",
1097-
);
1091+
// Group metadata by key to handle duplicate keys.
1092+
let mut grouped: HashMap<
1093+
String,
1094+
Vec<serde_json::Value>,
1095+
> = HashMap::new();
1096+
1097+
for (meta_key, meta_val) in rule.metadata() {
1098+
let key = meta_key.to_owned();
1099+
let val = serde_json::to_value(meta_val).expect(
1100+
"Derived Serialize impl should never fail",
1101+
);
1102+
grouped.entry(key).or_default().push(val);
1103+
}
10981104

1099-
(meta_key, meta_val)
1105+
// Single values stay as-is, multiple values become arrays.
1106+
grouped
1107+
.into_iter()
1108+
.map(|(k, mut v)| {
1109+
let val = if v.len() == 1 {
1110+
v.pop().unwrap()
1111+
} else {
1112+
serde_json::Value::Array(v)
1113+
};
1114+
(k, val)
11001115
})
11011116
.collect::<HashMap<_, _>>()
11021117
});

cli/src/tests/scan.rs

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ use assert_cmd::{cargo_bin, Command};
22
use assert_fs::prelude::*;
33
use assert_fs::TempDir;
44
use predicates::prelude::*;
5+
use serde_json;
56

67
#[test]
78
fn always_true() {
@@ -356,3 +357,65 @@ fn issue_280() {
356357
.assert()
357358
.success();
358359
}
360+
361+
#[test]
362+
fn json_output_duplicate_meta_keys() {
363+
// Test that duplicate metadata keys are preserved as arrays in JSON output
364+
let output = Command::new(cargo_bin!("yr"))
365+
.arg("scan")
366+
.arg("--output-format=json")
367+
.arg("--print-meta")
368+
.arg("src/tests/testdata/duplicate_meta.yar")
369+
.arg("src/tests/testdata/dummy.file")
370+
.assert()
371+
.success()
372+
.get_output()
373+
.stdout
374+
.clone();
375+
376+
let json: serde_json::Value =
377+
serde_json::from_slice(&output).expect("valid JSON output");
378+
379+
// Navigate to the meta object
380+
let meta = &json["matches"][0]["meta"];
381+
382+
// Single-value keys should remain as single values
383+
assert_eq!(meta["author"], "Test Author");
384+
assert_eq!(meta["description"], "Rule with duplicate metadata keys");
385+
386+
// Duplicate keys should become arrays
387+
let hash = &meta["hash"];
388+
assert!(hash.is_array(), "hash should be an array");
389+
let hash_array = hash.as_array().unwrap();
390+
assert_eq!(hash_array.len(), 3);
391+
assert!(hash_array.contains(&serde_json::json!("aaa111")));
392+
assert!(hash_array.contains(&serde_json::json!("bbb222")));
393+
assert!(hash_array.contains(&serde_json::json!("ccc333")));
394+
}
395+
396+
#[test]
397+
fn json_output_single_meta_not_array() {
398+
// Test that single metadata values are NOT wrapped in arrays
399+
let output = Command::new(cargo_bin!("yr"))
400+
.arg("scan")
401+
.arg("--output-format=json")
402+
.arg("--print-meta")
403+
.arg("src/tests/testdata/foo.yar")
404+
.arg("src/tests/testdata/dummy.file")
405+
.assert()
406+
.success()
407+
.get_output()
408+
.stdout
409+
.clone();
410+
411+
let json: serde_json::Value =
412+
serde_json::from_slice(&output).expect("valid JSON output");
413+
414+
let meta = &json["matches"][0]["meta"];
415+
416+
// All values should be single values, not arrays
417+
assert!(meta["string"].is_string());
418+
assert!(meta["bool"].is_boolean());
419+
assert!(meta["int"].is_i64());
420+
assert!(meta["float"].is_f64());
421+
}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
rule duplicate_meta {
2+
meta:
3+
author = "Test Author"
4+
hash = "aaa111"
5+
hash = "bbb222"
6+
hash = "ccc333"
7+
description = "Rule with duplicate metadata keys"
8+
condition:
9+
true
10+
}

0 commit comments

Comments
 (0)