2424from tqdm import tqdm
2525
2626from mitreattack import release_info
27+ from mitreattack .stix20 import MitreAttackData
2728
2829# explanation of modification types to data objects for legend in layer files
2930date = datetime .datetime .today ()
@@ -47,6 +48,53 @@ def __repr__(self):
4748 """Return a string representation of the ATT&CK object version."""
4849 return f"{ self .major } .{ self .minor } "
4950
51+ @dataclass
52+ class DomainStatistics :
53+ """Statistics for a single ATT&CK domain."""
54+
55+ name : str
56+ tactics : int
57+ techniques : int
58+ subtechniques : int
59+ groups : int
60+ software : int
61+ campaigns : int
62+ mitigations : int
63+ datasources : int
64+ assets : int = 0
65+
66+ def format_output (self ) -> str :
67+ """
68+ Format domain statistics as a string.
69+
70+ Returns
71+ -------
72+ str
73+ Formatted statistics string for display.
74+ """
75+ # Define all possible statistics with their labels
76+ stats = [
77+ (self .tactics , "Tactics" ),
78+ (self .techniques , "Techniques" ),
79+ (self .subtechniques , "Sub-Techniques" ),
80+ (self .groups , "Groups" ),
81+ (self .software , "Pieces of Software" ),
82+ (self .campaigns , "Campaigns" ),
83+ (self .mitigations , "Mitigations" ),
84+ (self .assets , "Assets" ),
85+ (self .datasources , "Data Sources" ),
86+ ]
87+
88+ # Build parts list, only including items with count > 0
89+ parts = [f"{ count } { label } " for count , label in stats if count > 0 ]
90+
91+ # Join all parts with proper formatting
92+ if len (parts ) == 0 :
93+ return f"- { self .name } : No objects"
94+ elif len (parts ) == 1 :
95+ return f"- { self .name } : { parts [0 ]} "
96+ else :
97+ return f"- { self .name } : { ', ' .join (parts [:- 1 ])} , and { parts [- 1 ]} "
5098
5199# TODO: Implement a custom decoder as well. Possible solution at this link
52100# https://alexisgomes19.medium.com/custom-json-encoder-with-python-f52c91b48cd2
@@ -1002,6 +1050,144 @@ def placard(self, stix_object: dict, section: str, domain: str) -> str:
10021050 full_placard_string = f"{ placard_string } { version_string } "
10031051 return full_placard_string
10041052
1053+ def _collect_domain_statistics (self , datastore : MemoryStore , domain_name : str ) -> DomainStatistics :
1054+ """
1055+ Collect statistics for a single domain from a STIX datastore.
1056+
1057+ Parameters
1058+ ----------
1059+ datastore : MemoryStore
1060+ The STIX MemoryStore containing the domain data.
1061+ domain_name : str
1062+ Display name of the domain (e.g., "Enterprise", "Mobile", "ICS").
1063+
1064+ Returns
1065+ -------
1066+ DomainStatistics
1067+ Statistics for the domain.
1068+ """
1069+ # Create MitreAttackData instance from the datastore
1070+ data = MitreAttackData (src = datastore )
1071+
1072+ # Get all object types, removing revoked and deprecated
1073+ tactics = data .get_tactics (remove_revoked_deprecated = True )
1074+ techniques = data .get_techniques (include_subtechniques = False , remove_revoked_deprecated = True )
1075+ subtechniques = data .get_subtechniques (remove_revoked_deprecated = True )
1076+ groups = data .get_groups (remove_revoked_deprecated = True )
1077+ software = data .get_software (remove_revoked_deprecated = True )
1078+ campaigns = data .get_campaigns (remove_revoked_deprecated = True )
1079+ mitigations = data .get_mitigations (remove_revoked_deprecated = True )
1080+
1081+ # Try to get datasources - may fail on test data with STIX version mismatches
1082+ datasources = []
1083+ try :
1084+ datasources = data .get_datasources (remove_revoked_deprecated = True )
1085+ except Exception :
1086+ # Silently skip datasources if there are STIX version issues
1087+ pass
1088+
1089+ # ICS domain has assets
1090+ assets = 0
1091+ if domain_name == "ICS" :
1092+ try :
1093+ assets = len (data .get_assets (remove_revoked_deprecated = True ))
1094+ except Exception :
1095+ # Silently skip assets if there are STIX version issues
1096+ pass
1097+
1098+ return DomainStatistics (
1099+ name = domain_name ,
1100+ tactics = len (tactics ),
1101+ techniques = len (techniques ),
1102+ subtechniques = len (subtechniques ),
1103+ groups = len (groups ),
1104+ software = len (software ),
1105+ campaigns = len (campaigns ),
1106+ mitigations = len (mitigations ),
1107+ datasources = len (datasources ),
1108+ assets = assets ,
1109+ )
1110+
1111+ def _collect_unique_object_counts (self , datastore_version : str ) -> dict [str , int ]:
1112+ """
1113+ Collect counts of unique objects across all domains for a specific version.
1114+
1115+ Some objects (Software, Groups, Campaigns) may appear in multiple domains.
1116+ This function counts unique objects to avoid double-counting.
1117+
1118+ Parameters
1119+ ----------
1120+ datastore_version : str
1121+ Either "old" or "new" to specify which version's data to analyze.
1122+
1123+ Returns
1124+ -------
1125+ dict of str to int
1126+ Counts of unique software, groups, and campaigns.
1127+ """
1128+ all_software_ids = set ()
1129+ all_groups_ids = set ()
1130+ all_campaigns_ids = set ()
1131+
1132+ for domain in self .domains :
1133+ datastore = self .data [datastore_version ][domain ]["stix_datastore" ]
1134+ data = MitreAttackData (src = datastore )
1135+
1136+ software = data .get_software (remove_revoked_deprecated = True )
1137+ groups = data .get_groups (remove_revoked_deprecated = True )
1138+ campaigns = data .get_campaigns (remove_revoked_deprecated = True )
1139+
1140+ all_software_ids .update (obj ["id" ] for obj in software )
1141+ all_groups_ids .update (obj ["id" ] for obj in groups )
1142+ all_campaigns_ids .update (obj ["id" ] for obj in campaigns )
1143+
1144+ return {
1145+ "software" : len (all_software_ids ),
1146+ "groups" : len (all_groups_ids ),
1147+ "campaigns" : len (all_campaigns_ids ),
1148+ }
1149+
1150+ def get_statistics_section (self , datastore_version : str = "new" ) -> str :
1151+ """
1152+ Generate a markdown section with ATT&CK statistics for all domains.
1153+
1154+ Parameters
1155+ ----------
1156+ datastore_version : str, optional
1157+ Either "old" or "new" to specify which version's statistics to generate.
1158+ Defaults to "new".
1159+
1160+ Returns
1161+ -------
1162+ str
1163+ Markdown-formatted statistics section.
1164+ """
1165+ # Collect unique object counts across all domains
1166+ unique_counts = self ._collect_unique_object_counts (datastore_version )
1167+
1168+ # Collect statistics for each domain
1169+ domain_stats = []
1170+ for domain in self .domains :
1171+ datastore = self .data [datastore_version ][domain ]["stix_datastore" ]
1172+ domain_label = self .domain_to_domain_label [domain ]
1173+ stats = self ._collect_domain_statistics (datastore , domain_label )
1174+ domain_stats .append (stats )
1175+
1176+ # Build the statistics section
1177+ version_label = "New" if datastore_version == "new" else "Old"
1178+ output = f"## { version_label } ATT&CK Version Statistics\n \n "
1179+ output += (
1180+ f"This version of ATT&CK contains { unique_counts ['software' ]} Pieces of Software, "
1181+ f"{ unique_counts ['groups' ]} Groups, and { unique_counts ['campaigns' ]} Campaigns.\n \n "
1182+ )
1183+ output += "Broken out by domain:\n \n "
1184+
1185+ for stats in domain_stats :
1186+ output += stats .format_output () + "\n "
1187+
1188+ output += "\n "
1189+ return output
1190+
10051191 def get_markdown_section_data (self , groupings , section : str , domain : str ) -> str :
10061192 """Parse a list of STIX objects in a section and return a string for the whole section."""
10071193 sectionString = ""
@@ -1056,6 +1242,11 @@ def get_markdown_string(self) -> str:
10561242 key_content = self .get_md_key ()
10571243 content = f"{ key_content } \n \n "
10581244
1245+ # Add statistics section for the new version
1246+ logger .info ("Generating statistics section" )
1247+ stats_section = self .get_statistics_section (datastore_version = "new" )
1248+ content += stats_section
1249+
10591250 for object_type in self .types :
10601251 domains = ""
10611252
0 commit comments