Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions lib/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,14 @@ generate-proto-code = [
"dep:yara-x-proto"
]

# Enables the generation of documentation for module fields and functions. This
# requires the `protoc` feature because it relies on `protoc`'s ability to
# extract documentation comments from .proto files, something that the pure-Rust
# parser in the `protobuf_codegen` crate can't do.
#
# This feature is disabled by default.
generate-module-docs = ["protoc"]

# Uses the `inventory` crate (https://github.com/dtolnay/inventory) instead
# of `linkme` (https://github.com/dtolnay/linkme) for tracking WASM exports.
#
Expand Down
241 changes: 221 additions & 20 deletions lib/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,17 @@
use protobuf::descriptor::FileDescriptorProto;

#[cfg(feature = "generate-proto-code")]
fn generate_module_files(proto_files: Vec<FileDescriptorProto>) {
#[derive(Clone, Ord, PartialOrd, Eq, PartialEq)]
struct Module {
name: String,
proto_mod: String,
rust_mod: Option<String>,
cargo_feature: Option<String>,
root_msg: String,
}

#[cfg(feature = "generate-proto-code")]
fn generate_module_files(proto_files: &[FileDescriptorProto]) -> Vec<Module> {
use std::fs::File;
use std::io::Write;
use std::path::PathBuf;
Expand All @@ -12,6 +22,7 @@ fn generate_module_files(proto_files: Vec<FileDescriptorProto>) {
println!("cargo:rerun-if-changed=src/modules/modules.rs");

let mut modules = Vec::new();

// Look for .proto files that describe a YARA module. A proto that
// describes a YARA module has yara.module_options, like...
//
Expand All @@ -25,7 +36,7 @@ fn generate_module_files(proto_files: Vec<FileDescriptorProto>) {
if let Some(module_options) =
yara_module_options.get(&proto_file.options)
{
let proto_path = PathBuf::from(proto_file.name.unwrap());
let proto_path = PathBuf::from(proto_file.name.as_ref().unwrap());
let proto_name = proto_path
.with_extension("")
.file_name()
Expand All @@ -34,13 +45,15 @@ fn generate_module_files(proto_files: Vec<FileDescriptorProto>) {
.unwrap()
.to_string();

modules.push((
module_options.name.unwrap(),
proto_name,
module_options.rust_module,
module_options.cargo_feature,
module_options.root_message.unwrap(),
));
let root_msg = module_options.root_message.unwrap();

modules.push(Module {
name: module_options.name.unwrap(),
proto_mod: proto_name,
rust_mod: module_options.rust_module,
cargo_feature: module_options.cargo_feature,
root_msg,
});
}
}

Expand All @@ -64,7 +77,7 @@ fn generate_module_files(proto_files: Vec<FileDescriptorProto>) {
println!(
"cargo:warning=to disable the warning set the environment variable YRX_REGENERATE_MODULES_RS=false"
);
return;
return Vec::new();
}
};

Expand Down Expand Up @@ -95,14 +108,14 @@ fn generate_module_files(proto_files: Vec<FileDescriptorProto>) {
// no matter the platform. If modules are not sorted, the order will
// vary from one platform to the other, in the same way that HashMap
// doesn't produce consistent key order.
modules.sort();
modules.sort_by(|a, b| a.name.cmp(&b.name));

for m in modules {
let name = m.0;
let proto_mod = m.1;
let rust_mod = m.2;
let cargo_feature = m.3;
let root_message = m.4;
for m in &modules {
let name = &m.name;
let proto_mod = &m.proto_mod;
let rust_mod = &m.rust_mod;
let cargo_feature = &m.cargo_feature;
let root_message = &m.root_msg;

// If the YARA module has an associated Rust module, this module must
// have a function named "main". If the YARA module doesn't have an
Expand Down Expand Up @@ -145,6 +158,187 @@ add_module!(modules, "{name}", {proto_mod}, "{root_message}", {rust_mod_name}, {
}

write!(add_modules_rs, "\n}}").unwrap();

modules
}

#[cfg(feature = "generate-module-docs")]
fn generate_module_docs(
proto_files: &[FileDescriptorProto],
modules: &[Module],
) {
use std::collections::{HashMap, HashSet};
use std::fs::File;
use std::io::Write;

// 1. Collect message dependencies
let mut dependencies = HashMap::new();

for proto_file in proto_files {
let package = proto_file.package.as_deref().unwrap_or("");

fn collect_deps(
msg: &protobuf::descriptor::DescriptorProto,
full_name: String,
deps: &mut HashMap<String, Vec<String>>,
) {
let mut referenced = Vec::new();
for field in &msg.field {
if field.type_()
== protobuf::descriptor::field_descriptor_proto::Type::TYPE_MESSAGE
{
if let Some(type_name) = &field.type_name {
let dep_name = type_name
.strip_prefix('.')
.unwrap_or(type_name)
.to_string();
referenced.push(dep_name);
}
}
}

for nested in &msg.nested_type {
let nested_name = format!(
"{}.{}",
full_name,
nested.name.as_deref().unwrap_or("")
);
collect_deps(nested, nested_name, deps);
}

deps.insert(full_name, referenced);
}

for msg in &proto_file.message_type {
let msg_name = msg.name.as_deref().unwrap_or("");
let full_name = if package.is_empty() {
msg_name.to_string()
} else {
format!("{}.{}", package, msg_name)
};
collect_deps(msg, full_name, &mut dependencies);
}
}

// 2. Compute transitive closure
let mut reachable = HashSet::new();
let mut queue: Vec<String> = Vec::new();

for m in modules {
let root = &m.root_msg;
if reachable.insert(root.clone()) {
queue.push(root.clone());
}
}

while let Some(node) = queue.pop() {
if let Some(deps) = dependencies.get(&node) {
for dep in deps {
if reachable.insert(dep.clone()) {
queue.push(dep.clone());
}
}
}
}

// 3. Generate docs only for reachable messages
let mut docs = Vec::new();

for proto_file in proto_files {
let package = proto_file.package.as_deref().unwrap_or("");
let mut msg_map = HashMap::new();

// Recursively traverse messages to build a map of paths to message names and field numbers.
fn traverse_msg(
msg: &protobuf::descriptor::DescriptorProto,
path: Vec<i32>,
full_name: String,
map: &mut HashMap<Vec<i32>, (String, Vec<u64>)>,
) {
let mut field_numbers = Vec::new();
for field in &msg.field {
field_numbers.push(field.number.unwrap_or(0) as u64);
}
map.insert(path.clone(), (full_name.clone(), field_numbers));

for (k, nested) in msg.nested_type.iter().enumerate() {
let mut nested_path = path.clone();
nested_path.push(3); // 3 is nested_type in DescriptorProto
nested_path.push(k as i32);
let nested_name = format!(
"{}.{}",
full_name,
nested.name.as_deref().unwrap_or("")
);
traverse_msg(nested, nested_path, nested_name, map);
}
}

for (i, msg) in proto_file.message_type.iter().enumerate() {
let msg_name = msg.name.as_deref().unwrap_or("");
let full_name = if package.is_empty() {
msg_name.to_string()
} else {
format!("{}.{}", package, msg_name)
};
traverse_msg(msg, vec![4, i as i32], full_name, &mut msg_map);
}

let source_code_info_ref = proto_file.source_code_info.as_ref();
let source_code_info = match source_code_info_ref {
Some(info) => info,
None => continue,
};

for location in &source_code_info.location {
let path = &location.path;
if path.len() >= 2 && path[path.len() - 2] == 2 {
let field_idx = path[path.len() - 1] as usize;
let msg_path = &path[..path.len() - 2];

if let Some((msg_name, field_numbers)) = msg_map.get(msg_path)
{
if reachable.contains(msg_name)
&& field_idx < field_numbers.len()
{
let field_number = field_numbers[field_idx];
if let Some(comments) = &location.leading_comments {
docs.push((
msg_name.clone(),
field_number,
comments.trim().to_string(),
));
}
}
}
}
}
}

docs.sort();

let mut field_docs_rs = File::create("src/modules/field_docs.rs").unwrap();

writeln!(
field_docs_rs,
"// File generated automatically by build.rs. Do not edit.\n"
)
.unwrap();

writeln!(field_docs_rs, "pub const FIELD_DOCS: &[(&str, u64, &str)] = &[")
.unwrap();

for (msg_name, field_number, comments) in docs {
let escaped_comments = comments.replace("\"", "\\\"");
writeln!(
field_docs_rs,
r#" ("{}", {}, "{}"),"#,
msg_name, field_number, escaped_comments
)
.unwrap();
}

writeln!(field_docs_rs, "];").unwrap();
}

#[cfg(feature = "generate-proto-code")]
Expand All @@ -162,6 +356,9 @@ fn generate_proto_code() {
if cfg!(feature = "protoc") {
proto_compiler.protoc();
proto_parser.protoc();

#[cfg(feature = "generate-module-docs")]
proto_parser.protoc_extra_args(["--include_source_info"]);
} else {
proto_compiler.pure();
proto_parser.pure();
Expand Down Expand Up @@ -261,9 +458,13 @@ fn generate_proto_code() {
}

if regenerate {
generate_module_files(
proto_parser.file_descriptor_set().unwrap().file,
);
let proto_files = proto_parser.file_descriptor_set().unwrap().file;

#[allow(unused_variables)]
let modules = generate_module_files(&proto_files);

#[cfg(feature = "generate-module-docs")]
generate_module_docs(&proto_files, &modules);

let out_dir = env::var("OUT_DIR").unwrap();
let src_dir = PathBuf::from("src/modules/protos/generated");
Expand Down
60 changes: 60 additions & 0 deletions lib/src/modules/field_docs.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
// File generated automatically by build.rs. Do not edit.

pub const FIELD_DOCS: &[(&str, u64, &str)] = &[
("dex.DexHeader", 2, "DEX version (35, 36, 37, ...)"),
("lnk.Lnk", 1, "True if the file is a LNK file."),
("lnk.Lnk", 2, "A description of the shortcut that is displayed to end users to identify
the purpose of the link."),
("lnk.Lnk", 3, "Time when the LNK file was created."),
("lnk.Lnk", 4, "Time when the LNK file was last accessed."),
("lnk.Lnk", 5, "Time when the LNK files was last modified."),
("lnk.Lnk", 6, "Size of the target file in bytes. The target file is the file that this
link references to. If the link target file is larger than 0xFFFFFFFF,
this value specifies the least significant 32 bits of the link target file
size."),
("lnk.Lnk", 7, "Attributes of the link target file."),
("lnk.Lnk", 8, "Location where the icon associated to the link is found. This is usually
an EXE or DLL file that contains the icon among its resources. The
specific icon to be used is indicated by the `icon_index` field."),
("lnk.Lnk", 9, "Index of the icon that is associated to the link, within an icon location."),
("lnk.Lnk", 10, "Expected window state of an application launched by this link."),
("lnk.Lnk", 11, "Type of drive the link is stored on."),
("lnk.Lnk", 12, "Drive serial number of the volume the link target is stored on."),
("lnk.Lnk", 13, "Volume label of the drive the link target is stored on."),
("lnk.Lnk", 14, "String used to construct the full path to the link target by appending the
common_path_suffix field."),
("lnk.Lnk", 15, "String used to construct the full path to the link target by being appended
to the local_base_path field."),
("lnk.Lnk", 16, "Location of the link target relative to the LNK file."),
("lnk.Lnk", 17, "Path of the working directory to be used when activating the link target."),
("lnk.Lnk", 18, "Command-line arguments that are specified when activating the link target."),
("lnk.Lnk", 19, "Size in bytes of any extra data appended to the LNK file."),
("lnk.Lnk", 20, "Offset within the LNK file where the overlay starts."),
("lnk.Lnk", 21, "Distributed link tracker information."),
("macho.Macho", 1, "Set Mach-O header and basic fields"),
("macho.Macho", 29, "Add fields for Mach-O fat binary header"),
("macho.Macho", 32, "Nested Mach-O files"),
("pe.PE", 16, "Entry point as a file offset."),
("pe.PE", 17, "Entry point as it appears in the PE header (RVA)."),
("pe.Section", 1, "The section's name as listed in the section table. The data type is `bytes`
instead of `string` so that it can accommodate invalid UTF-8 content. The
length is 8 bytes at most."),
("pe.Section", 2, "For section names longer than 8 bytes, the name in the section table (and
in the `name` field) contains a forward slash (/) followed by an ASCII
representation of a decimal number that is an offset into the string table.
(examples: \"/4\", \"/123\") This mechanism is described in the MSDN and used
by GNU compilers.

When this scenario occurs, the `full_name` field holds the actual section
name. In all other cases, it simply duplicates the content of the `name`
field.

See: https://learn.microsoft.com/en-us/windows/win32/api/winnt/ns-winnt-image_section_header#members"),
("pe.Version", 1, "Major version."),
("pe.Version", 2, "Minor version."),
("test_proto2.TestProto2", 350, "This field will be visible in YARA as `bool_yara` instead of `bool_proto`."),
("test_proto2.TestProto2", 351, "This field won't be visible to YARA."),
("test_proto2.TestProto2", 500, "This field is accessible only if the features \"foo\" (or \"FOO\") and \"bar\"
are enabled while compiling the YARA rules."),
("test_proto2.TestProto2", 502, "The metadata received by the module is copied into this field."),
];
Loading
Loading