@@ -367,16 +367,67 @@ def analyticsToDf(src):
367367 analytics = src .query ([Filter ("type" , "=" , "x-mitre-analytic" )])
368368 analytics = remove_revoked_deprecated (analytics )
369369
370+ # Detection strategies (needed for analytics to detection strategies relationship)
371+ detection_strategies = src .query ([Filter ("type" , "=" , "x-mitre-detection-strategy" )])
372+ detection_strategies = remove_revoked_deprecated (detection_strategies )
373+
370374 dataframes = {}
371375 if analytics :
372376 analytic_rows = []
377+ logsource_rows = []
378+ analytic_to_ds_rows = []
379+
380+ # analytics to detection strategies
381+ analytic_to_ds_map = {}
382+ for ds in detection_strategies :
383+ for analytic_id in ds .get ("x_mitre_analytic_refs" , []):
384+ analytic_to_ds_map .setdefault (analytic_id , []).append (
385+ {
386+ "detection_strategy_attack_id" : ds ["external_references" ][0 ]["external_id" ],
387+ "detection_strategy_id" : ds ["id" ],
388+ "detection_strategy_name" : ds .get ("name" , "" ),
389+ }
390+ )
391+
373392 for analytic in tqdm (analytics , desc = "parsing analytics" ):
374393 analytic_rows .append (parseBaseStix (analytic ))
375394
395+ # log-source relationship table rows
396+ for logsrc in analytic .get ("x_mitre_log_source_references" , []):
397+ data_comp_id = logsrc .get ("x_mitre_data_component_ref" , "" )
398+ data_comp = src .get (data_comp_id )
399+ data_comp_name = data_comp .get ("name" , "" ) if data_comp else ""
400+ data_comp_attack_id = data_comp ["external_references" ][0 ]["external_id" ]
401+
402+ logsource_rows .append (
403+ {
404+ "analytic_id" : analytic ["id" ],
405+ "analytic_name" : analytic ["external_references" ][0 ]["external_id" ],
406+ "data_component_id" : data_comp_id ,
407+ "data_component_name" : data_comp_name ,
408+ "data_component_attack_id" : data_comp_attack_id ,
409+ "log_source_name" : logsrc .get ("name" , "" ),
410+ "channel" : logsrc .get ("channel" , "" ),
411+ "platforms" : ", " .join (sorted (analytic .get ("x_mitre_platforms" , []))),
412+ }
413+ )
414+
415+ # detection strategies relationship table rows
416+ for ds_info in analytic_to_ds_map .get (analytic ["id" ], []):
417+ analytic_to_ds_rows .append (
418+ {
419+ "analytic_id" : analytic ["id" ],
420+ "analytic_name" : analytic ["external_references" ][0 ]["external_id" ],
421+ "detection_strategy_id" : ds_info ["detection_strategy_id" ],
422+ "detection_strategy_attack_id" : ds_info ["detection_strategy_attack_id" ],
423+ "detection_strategy_name" : ds_info ["detection_strategy_name" ],
424+ "platforms" : ", " .join (sorted (analytic .get ("x_mitre_platforms" , []))),
425+ }
426+ )
427+
428+ dataframes ["analytics" ] = pd .DataFrame (analytic_rows ).sort_values ("name" )
429+
376430 citations = get_citations (analytics )
377- dataframes = {
378- "analytics" : pd .DataFrame (analytic_rows ).sort_values ("name" ),
379- }
380431 if not citations .empty :
381432 dataframes ["citations" ] = citations .sort_values ("reference" )
382433
@@ -398,20 +449,36 @@ def detectionstrategiesToDf(src):
398449 dataframes = {}
399450 if detection_strategies :
400451 detection_strategy_rows = []
452+ rel_rows = []
401453 for detection_strategy in tqdm (detection_strategies , desc = "parsing detection strategies" ):
402- detection_strategy_rows .append (parseBaseStix (detection_strategy ))
454+ row = parseBaseStix (detection_strategy )
455+ row ["analytic_refs" ] = "; " .join (detection_strategy .get ("x_mitre_analytic_refs" , []))
456+ detection_strategy_rows .append (row )
457+
458+ # analytics relationship table rows
459+ for analytic_id in detection_strategy .get ("x_mitre_analytic_refs" , []):
460+ analytic_obj = src .get (analytic_id )
461+
462+ rel_rows .append (
463+ {
464+ "detection_strategy_attack_id" : detection_strategy ["external_references" ][0 ]["external_id" ],
465+ "detection_strategy_id" : detection_strategy ["id" ],
466+ "detection_strategy_name" : detection_strategy .get ("name" , "" ),
467+ "analytic_id" : analytic_id ,
468+ "analytic_name" : analytic_obj ["external_references" ][0 ]["external_id" ],
469+ "platforms" : ", " .join (sorted (analytic_obj .get ("x_mitre_platforms" , []))),
470+ }
471+ )
472+
473+ # Build main dataframes
474+ dataframes ["detectionstrategies" ] = pd .DataFrame (detection_strategy_rows ).sort_values ("name" )
403475
404476 citations = get_citations (detection_strategies )
405- dataframes = {
406- "detectionstrategies" : pd .DataFrame (detection_strategy_rows ).sort_values ("name" ),
407- }
408477 if not citations .empty :
409- if "citations" in dataframes : # append to existing citations from references
410- dataframes ["citations" ] = citations .sort_values ("reference" )
478+ dataframes ["citations" ] = citations .sort_values ("reference" )
411479
412480 else :
413481 logger .warning ("No detection strategies found - nothing to parse" )
414-
415482 return dataframes
416483
417484
@@ -461,6 +528,50 @@ def softwareToDf(src):
461528 return dataframes
462529
463530
531+ def detectionStrategiesAnalyticsLogSourcesDf (src ):
532+ """Build a single DS -> LogSource -> Analytic dataframe directly from STIX."""
533+ detection_strategies = src .query ([Filter ("type" , "=" , "x-mitre-detection-strategy" )])
534+ detection_strategies = remove_revoked_deprecated (detection_strategies )
535+
536+ analytics = src .query ([Filter ("type" , "=" , "x-mitre-analytic" )])
537+ analytics = remove_revoked_deprecated (analytics )
538+ analytics_by_id = {a ["id" ]: a for a in analytics }
539+
540+ rows = []
541+ for ds in detection_strategies :
542+ ds_attack_id = ds .get ("external_references" , [{}])[0 ].get ("external_id" , "" )
543+ ds_id = ds .get ("id" , "" )
544+ ds_name = ds .get ("name" , "" )
545+
546+ for analytic_id in ds .get ("x_mitre_analytic_refs" , []):
547+ analytic = analytics_by_id .get (analytic_id )
548+ analytic_attack_id = analytic ["external_references" ][0 ]["external_id" ]
549+ platforms = ", " .join (sorted (analytic .get ("x_mitre_platforms" , [])))
550+
551+ logsrc_refs = analytic .get ("x_mitre_log_source_references" , [])
552+ for logsrc in logsrc_refs :
553+ data_comp_id = logsrc .get ("x_mitre_data_component_ref" , "" )
554+ data_comp = src .get (data_comp_id )
555+
556+ rows .append (
557+ {
558+ "detection_strategy_attack_id" : ds_attack_id ,
559+ "detection_strategy_id" : ds_id ,
560+ "detection_strategy_name" : ds_name ,
561+ "analytic_id" : analytic_id ,
562+ "analytic_name" : analytic_attack_id ,
563+ "platforms" : platforms ,
564+ "log_source_name" : logsrc .get ("name" , "" ),
565+ "channel" : logsrc .get ("channel" , "" ),
566+ "data_component_id" : data_comp_id ,
567+ "data_component_name" : (data_comp .get ("name" , "" ) if data_comp else "" ),
568+ "data_component_attack_id" : data_comp ["external_references" ][0 ]["external_id" ],
569+ }
570+ )
571+
572+ return pd .DataFrame (rows )
573+
574+
464575def groupsToDf (src ):
465576 """Parse STIX groups from the given data and return corresponding pandas dataframes.
466577
0 commit comments