@@ -376,7 +376,7 @@ def analyticsToDf(src):
376376 analytic_rows = []
377377 logsource_rows = []
378378 analytic_to_ds_rows = []
379- failed_analytics = set ()
379+ failed_by_data_component = {}
380380
381381 # analytics to detection strategies
382382 analytic_to_ds_map = {}
@@ -392,19 +392,29 @@ def analyticsToDf(src):
392392
393393 # Prints out errors where data components are not in the same domain as analytics
394394 for analytic in tqdm (analytics , desc = "parsing analytics" ):
395+ analytic_id = analytic .get ("id" )
395396 for logsrc in analytic .get ("x_mitre_log_source_references" , []):
396397 data_comp_id = logsrc .get ("x_mitre_data_component_ref" , "" )
397398 data_comp = src .get (data_comp_id )
398399 try :
399400 data_comp_attack_id = data_comp ["external_references" ][0 ]["external_id" ]
400401 except (KeyError , TypeError , IndexError , AttributeError ):
401- failed_analytics .add ((analytic ["id" ], data_comp_id ))
402-
403- if failed_analytics :
404- raise RuntimeError (
405- f"{ len (failed_analytics )} failures:\n " +
406- "\n " .join (f"analytic={ a } , data_component={ d } " for a , d in sorted (failed_analytics ))
407- )
402+ if data_comp_id not in failed_by_data_component :
403+ failed_by_data_component [data_comp_id ] = []
404+ failed_by_data_component [data_comp_id ].append (analytic_id )
405+
406+ if failed_by_data_component :
407+ lines = ["Failures grouped by data component:\n " ]
408+ for dc_id in sorted (failed_by_data_component ):
409+ analytic_ids = sorted (set (failed_by_data_component [dc_id ]))
410+ dc_obj = src .get (dc_id ) or {}
411+ dc_name = dc_obj .get ("name" , "" )
412+
413+ lines .append (f"data_component={ dc_id } " + (f" ({ dc_name } )" if dc_name else "" ))
414+ lines .extend ([f" - analytic={ a } " for a in analytic_ids ])
415+ lines .append ("" )
416+
417+ raise RuntimeError ("\n " .join (lines ))
408418
409419 for analytic in tqdm (analytics , desc = "parsing analytics" ):
410420 analytic_rows .append (parseBaseStix (analytic ))
0 commit comments