22use protobuf:: descriptor:: FileDescriptorProto ;
33
44#[ cfg( feature = "generate-proto-code" ) ]
5- fn generate_module_files ( proto_files : Vec < FileDescriptorProto > ) {
5+ #[ derive( Clone , Ord , PartialOrd , Eq , PartialEq ) ]
6+ struct Module {
7+ name : String ,
8+ proto_mod : String ,
9+ rust_mod : Option < String > ,
10+ cargo_feature : Option < String > ,
11+ root_msg : String ,
12+ }
13+
14+ #[ cfg( feature = "generate-proto-code" ) ]
15+ fn generate_module_files ( proto_files : & [ FileDescriptorProto ] ) -> Vec < Module > {
616 use std:: fs:: File ;
717 use std:: io:: Write ;
818 use std:: path:: PathBuf ;
@@ -12,6 +22,7 @@ fn generate_module_files(proto_files: Vec<FileDescriptorProto>) {
1222 println ! ( "cargo:rerun-if-changed=src/modules/modules.rs" ) ;
1323
1424 let mut modules = Vec :: new ( ) ;
25+
1526 // Look for .proto files that describe a YARA module. A proto that
1627 // describes a YARA module has yara.module_options, like...
1728 //
@@ -25,7 +36,7 @@ fn generate_module_files(proto_files: Vec<FileDescriptorProto>) {
2536 if let Some ( module_options) =
2637 yara_module_options. get ( & proto_file. options )
2738 {
28- let proto_path = PathBuf :: from ( proto_file. name . unwrap ( ) ) ;
39+ let proto_path = PathBuf :: from ( proto_file. name . as_ref ( ) . unwrap ( ) ) ;
2940 let proto_name = proto_path
3041 . with_extension ( "" )
3142 . file_name ( )
@@ -34,13 +45,15 @@ fn generate_module_files(proto_files: Vec<FileDescriptorProto>) {
3445 . unwrap ( )
3546 . to_string ( ) ;
3647
37- modules. push ( (
38- module_options. name . unwrap ( ) ,
39- proto_name,
40- module_options. rust_module ,
41- module_options. cargo_feature ,
42- module_options. root_message . unwrap ( ) ,
43- ) ) ;
48+ let root_msg = module_options. root_message . unwrap ( ) ;
49+
50+ modules. push ( Module {
51+ name : module_options. name . unwrap ( ) ,
52+ proto_mod : proto_name,
53+ rust_mod : module_options. rust_module ,
54+ cargo_feature : module_options. cargo_feature ,
55+ root_msg,
56+ } ) ;
4457 }
4558 }
4659
@@ -64,7 +77,7 @@ fn generate_module_files(proto_files: Vec<FileDescriptorProto>) {
6477 println ! (
6578 "cargo:warning=to disable the warning set the environment variable YRX_REGENERATE_MODULES_RS=false"
6679 ) ;
67- return ;
80+ return Vec :: new ( ) ;
6881 }
6982 } ;
7083
@@ -95,14 +108,14 @@ fn generate_module_files(proto_files: Vec<FileDescriptorProto>) {
95108 // no matter the platform. If modules are not sorted, the order will
96109 // vary from one platform to the other, in the same way that HashMap
97110 // doesn't produce consistent key order.
98- modules. sort ( ) ;
111+ modules. sort_by ( |a , b| a . name . cmp ( & b . name ) ) ;
99112
100- for m in modules {
101- let name = m . 0 ;
102- let proto_mod = m . 1 ;
103- let rust_mod = m . 2 ;
104- let cargo_feature = m . 3 ;
105- let root_message = m . 4 ;
113+ for m in & modules {
114+ let name = & m . name ;
115+ let proto_mod = & m . proto_mod ;
116+ let rust_mod = & m . rust_mod ;
117+ let cargo_feature = & m . cargo_feature ;
118+ let root_message = & m . root_msg ;
106119
107120 // If the YARA module has an associated Rust module, this module must
108121 // have a function named "main". If the YARA module doesn't have an
@@ -145,6 +158,187 @@ add_module!(modules, "{name}", {proto_mod}, "{root_message}", {rust_mod_name}, {
145158 }
146159
147160 write ! ( add_modules_rs, "\n }}" ) . unwrap ( ) ;
161+
162+ modules
163+ }
164+
165+ #[ cfg( feature = "generate-module-docs" ) ]
166+ fn generate_module_docs (
167+ proto_files : & [ FileDescriptorProto ] ,
168+ modules : & [ Module ] ,
169+ ) {
170+ use std:: collections:: { HashMap , HashSet } ;
171+ use std:: fs:: File ;
172+ use std:: io:: Write ;
173+
174+ // 1. Collect message dependencies
175+ let mut dependencies = HashMap :: new ( ) ;
176+
177+ for proto_file in proto_files {
178+ let package = proto_file. package . as_deref ( ) . unwrap_or ( "" ) ;
179+
180+ fn collect_deps (
181+ msg : & protobuf:: descriptor:: DescriptorProto ,
182+ full_name : String ,
183+ deps : & mut HashMap < String , Vec < String > > ,
184+ ) {
185+ let mut referenced = Vec :: new ( ) ;
186+ for field in & msg. field {
187+ if field. type_ ( )
188+ == protobuf:: descriptor:: field_descriptor_proto:: Type :: TYPE_MESSAGE
189+ {
190+ if let Some ( type_name) = & field. type_name {
191+ let dep_name = type_name
192+ . strip_prefix ( '.' )
193+ . unwrap_or ( type_name)
194+ . to_string ( ) ;
195+ referenced. push ( dep_name) ;
196+ }
197+ }
198+ }
199+
200+ for nested in & msg. nested_type {
201+ let nested_name = format ! (
202+ "{}.{}" ,
203+ full_name,
204+ nested. name. as_deref( ) . unwrap_or( "" )
205+ ) ;
206+ collect_deps ( nested, nested_name, deps) ;
207+ }
208+
209+ deps. insert ( full_name, referenced) ;
210+ }
211+
212+ for msg in & proto_file. message_type {
213+ let msg_name = msg. name . as_deref ( ) . unwrap_or ( "" ) ;
214+ let full_name = if package. is_empty ( ) {
215+ msg_name. to_string ( )
216+ } else {
217+ format ! ( "{}.{}" , package, msg_name)
218+ } ;
219+ collect_deps ( msg, full_name, & mut dependencies) ;
220+ }
221+ }
222+
223+ // 2. Compute transitive closure
224+ let mut reachable = HashSet :: new ( ) ;
225+ let mut queue: Vec < String > = Vec :: new ( ) ;
226+
227+ for m in modules {
228+ let root = & m. root_msg ;
229+ if reachable. insert ( root. clone ( ) ) {
230+ queue. push ( root. clone ( ) ) ;
231+ }
232+ }
233+
234+ while let Some ( node) = queue. pop ( ) {
235+ if let Some ( deps) = dependencies. get ( & node) {
236+ for dep in deps {
237+ if reachable. insert ( dep. clone ( ) ) {
238+ queue. push ( dep. clone ( ) ) ;
239+ }
240+ }
241+ }
242+ }
243+
244+ // 3. Generate docs only for reachable messages
245+ let mut docs = Vec :: new ( ) ;
246+
247+ for proto_file in proto_files {
248+ let package = proto_file. package . as_deref ( ) . unwrap_or ( "" ) ;
249+ let mut msg_map = HashMap :: new ( ) ;
250+
251+ // Recursively traverse messages to build a map of paths to message names and field numbers.
252+ fn traverse_msg (
253+ msg : & protobuf:: descriptor:: DescriptorProto ,
254+ path : Vec < i32 > ,
255+ full_name : String ,
256+ map : & mut HashMap < Vec < i32 > , ( String , Vec < u64 > ) > ,
257+ ) {
258+ let mut field_numbers = Vec :: new ( ) ;
259+ for field in & msg. field {
260+ field_numbers. push ( field. number . unwrap_or ( 0 ) as u64 ) ;
261+ }
262+ map. insert ( path. clone ( ) , ( full_name. clone ( ) , field_numbers) ) ;
263+
264+ for ( k, nested) in msg. nested_type . iter ( ) . enumerate ( ) {
265+ let mut nested_path = path. clone ( ) ;
266+ nested_path. push ( 3 ) ; // 3 is nested_type in DescriptorProto
267+ nested_path. push ( k as i32 ) ;
268+ let nested_name = format ! (
269+ "{}.{}" ,
270+ full_name,
271+ nested. name. as_deref( ) . unwrap_or( "" )
272+ ) ;
273+ traverse_msg ( nested, nested_path, nested_name, map) ;
274+ }
275+ }
276+
277+ for ( i, msg) in proto_file. message_type . iter ( ) . enumerate ( ) {
278+ let msg_name = msg. name . as_deref ( ) . unwrap_or ( "" ) ;
279+ let full_name = if package. is_empty ( ) {
280+ msg_name. to_string ( )
281+ } else {
282+ format ! ( "{}.{}" , package, msg_name)
283+ } ;
284+ traverse_msg ( msg, vec ! [ 4 , i as i32 ] , full_name, & mut msg_map) ;
285+ }
286+
287+ let source_code_info_ref = proto_file. source_code_info . as_ref ( ) ;
288+ let source_code_info = match source_code_info_ref {
289+ Some ( info) => info,
290+ None => continue ,
291+ } ;
292+
293+ for location in & source_code_info. location {
294+ let path = & location. path ;
295+ if path. len ( ) >= 2 && path[ path. len ( ) - 2 ] == 2 {
296+ let field_idx = path[ path. len ( ) - 1 ] as usize ;
297+ let msg_path = & path[ ..path. len ( ) - 2 ] ;
298+
299+ if let Some ( ( msg_name, field_numbers) ) = msg_map. get ( msg_path)
300+ {
301+ if reachable. contains ( msg_name)
302+ && field_idx < field_numbers. len ( )
303+ {
304+ let field_number = field_numbers[ field_idx] ;
305+ if let Some ( comments) = & location. leading_comments {
306+ docs. push ( (
307+ msg_name. clone ( ) ,
308+ field_number,
309+ comments. trim ( ) . to_string ( ) ,
310+ ) ) ;
311+ }
312+ }
313+ }
314+ }
315+ }
316+ }
317+
318+ docs. sort ( ) ;
319+
320+ let mut field_docs_rs = File :: create ( "src/modules/field_docs.rs" ) . unwrap ( ) ;
321+
322+ writeln ! (
323+ field_docs_rs,
324+ "// File generated automatically by build.rs. Do not edit.\n "
325+ )
326+ . unwrap ( ) ;
327+
328+ writeln ! ( field_docs_rs, "pub const FIELD_DOCS: &[(&str, u64, &str)] = &[" )
329+ . unwrap ( ) ;
330+
331+ for ( msg_name, field_number, comments) in docs {
332+ let escaped_comments = comments. replace ( "\" " , "\\ \" " ) ;
333+ writeln ! (
334+ field_docs_rs,
335+ r#" ("{}", {}, "{}"),"# ,
336+ msg_name, field_number, escaped_comments
337+ )
338+ . unwrap ( ) ;
339+ }
340+
341+ writeln ! ( field_docs_rs, "];" ) . unwrap ( ) ;
148342}
149343
150344#[ cfg( feature = "generate-proto-code" ) ]
@@ -162,6 +356,9 @@ fn generate_proto_code() {
162356 if cfg ! ( feature = "protoc" ) {
163357 proto_compiler. protoc ( ) ;
164358 proto_parser. protoc ( ) ;
359+
360+ #[ cfg( feature = "generate-module-docs" ) ]
361+ proto_parser. protoc_extra_args ( [ "--include_source_info" ] ) ;
165362 } else {
166363 proto_compiler. pure ( ) ;
167364 proto_parser. pure ( ) ;
@@ -261,9 +458,13 @@ fn generate_proto_code() {
261458 }
262459
263460 if regenerate {
264- generate_module_files (
265- proto_parser. file_descriptor_set ( ) . unwrap ( ) . file ,
266- ) ;
461+ let proto_files = proto_parser. file_descriptor_set ( ) . unwrap ( ) . file ;
462+
463+ #[ allow( unused_variables) ]
464+ let modules = generate_module_files ( & proto_files) ;
465+
466+ #[ cfg( feature = "generate-module-docs" ) ]
467+ generate_module_docs ( & proto_files, & modules) ;
267468
268469 let out_dir = env:: var ( "OUT_DIR" ) . unwrap ( ) ;
269470 let src_dir = PathBuf :: from ( "src/modules/protos/generated" ) ;
0 commit comments