ConnectionMaster
diff --git a/‎nutanix/README.md‎
Lines changed: 4 additions & 0 deletions b/‎nutanix/README.md‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎nutanix/assets/configuration/spec.yaml‎
Lines changed: 11 additions & 0 deletions b/‎nutanix/assets/configuration/spec.yaml‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎nutanix/changelog.d/22997.added‎
Lines changed: 1 addition & 0 deletions b/‎nutanix/changelog.d/22997.added‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎nutanix/datadog_checks/nutanix/activity_monitor.py‎
Lines changed: 46 additions & 56 deletions b/‎nutanix/datadog_checks/nutanix/activity_monitor.py‎
Lines changed: 46 additions & 56 deletions
diff --git a/‎nutanix/datadog_checks/nutanix/check.py‎
Lines changed: 19 additions & 13 deletions b/‎nutanix/datadog_checks/nutanix/check.py‎
Lines changed: 19 additions & 13 deletions
diff --git a/‎nutanix/datadog_checks/nutanix/config_models/defaults.py‎
Lines changed: 4 additions & 0 deletions b/‎nutanix/datadog_checks/nutanix/config_models/defaults.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎nutanix/datadog_checks/nutanix/config_models/instance.py‎
Lines changed: 1 addition & 0 deletions b/‎nutanix/datadog_checks/nutanix/config_models/instance.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎nutanix/datadog_checks/nutanix/data/conf.yaml.example‎
Lines changed: 10 additions & 0 deletions b/‎nutanix/datadog_checks/nutanix/data/conf.yaml.example‎
Lines changed: 10 additions & 0 deletions
@@ -101,6 +101,10 @@ resource_filters:
 
 Category tags use the Nutanix category key as the tag name (e.g., `Environment:Production`). Set `prefix_category_tags: true` to prefix them with `ntnx_` (e.g., `ntnx_Environment:Production`) to avoid collisions with existing Datadog tags.
 
+### Cluster capacity planning
+
+Cluster-level capacity metrics (such as `cluster.cpu.total_cores`, `cluster.cpu.vcpus_allocated`, `cluster.memory.allocated_bytes`) aggregate resources from all hosts and VMs. By default, all resources contribute regardless of `resource_filters`. This gives a complete view of provisioned capacity. Set `exclude_filtered_resources_from_cluster_capacity: true` to count only resources that pass filter checks.
+
 ### Duplicate hostnames
 
 The Nutanix API does not expose the real hostname of VMs. VM metrics use the VM name from Prism Central as the hostname. If the Datadog Agent is installed on a Nutanix VM, its auto-detected hostname may differ from the VM name, causing duplicate hosts in Datadog. To fix this, set `hostname` in `datadog.yaml` (or the `DD_HOSTNAME` environment variable) to match the VM name in Prism Central.
 
@@ -191,6 +191,17 @@ files:
             value:
               type: boolean
               example: false
+          - name: exclude_filtered_resources_from_cluster_capacity
+            description: |
+              Whether to exclude filtered resources (hosts and VMs excluded by power state or
+              resource_filters) from cluster-level capacity metrics.
+              When false (default), cluster capacity reflects total provisioned resources across
+              all hosts and VMs, regardless of whether individual metrics are reported.
+              When true, only resources that pass filter checks contribute to cluster capacity
+              metrics (total_cores, total_threads, total_bytes, vcpus_allocated, memory_allocated_bytes).
+            value:
+              type: boolean
+              example: false
           - name: batch_vm_collection
             description: |
               Whether to fetch all VMs in a single paginated API call instead of per-host.
 
@@ -0,0 +1 @@
+Add `exclude_filtered_resources_from_cluster_capacity` option to control whether filtered resources contribute to cluster capacity metrics.
@@ -27,6 +27,7 @@ def __missing__(self, key):
 class ActivityMonitor:
     def __init__(self, check: NutanixCheck):
         self.check = check
+        self._pc_label = f"PC:{self.check.pc_ip}:{self.check.pc_port}"
         self.last_event_collection_time = self.check.read_persistent_cache("last_event_collection_time")
         self.last_task_collection_time = self.check.read_persistent_cache("last_task_collection_time")
         self.last_audit_collection_time = self.check.read_persistent_cache("last_audit_collection_time")
@@ -36,6 +37,11 @@ def __init__(self, check: NutanixCheck):
         self.audits: dict[str, dict] = {}
         self.alerts: dict[str, dict] = {}
         self.tasks: dict[str, dict] = {}
+        # Entity counters
+        self.events_count = 0
+        self.tasks_count = 0
+        self.audits_count = 0
+        self.alerts_count = 0
         # Read boolean flag from cache (stored as string)
         cached_value = self.check.read_persistent_cache("alerts_v42_supported")
         if cached_value == "True":
@@ -46,11 +52,15 @@ def __init__(self, check: NutanixCheck):
             self.alerts_v42_supported = None
 
     def reset_state(self) -> None:
-        """Reset in-memory caches for a new collection run."""
+        """Reset in-memory caches and counters for a new collection run."""
         self.events = {}
         self.audits = {}
         self.alerts = {}
         self.tasks = {}
+        self.events_count = 0
+        self.tasks_count = 0
+        self.audits_count = 0
+        self.alerts_count = 0
 
     def _collect(
         self,
@@ -68,24 +78,18 @@ def _collect(
             now = get_current_datetime()
             start_time = (now - timedelta(seconds=self.check.sampling_interval)).isoformat().replace("+00:00", "Z")
 
-        self.check.log.debug(
-            "[PC:%s:%s] Collecting %ss since: %s", self.check.pc_ip, self.check.pc_port, activity_kind, start_time
-        )
+        self.check.log.debug("[%s] Collecting %ss since: %s", self._pc_label, activity_kind, start_time)
 
         items = list_fn(start_time)
         if not items:
-            self.check.log.debug("[PC:%s:%s] No %ss found", self.check.pc_ip, self.check.pc_port, activity_kind)
+            self.check.log.debug("[%s] No %ss found", self._pc_label, activity_kind)
             return 0
 
-        self.check.log.debug(
-            "[PC:%s:%s] Fetched %d %ss from API", self.check.pc_ip, self.check.pc_port, len(items), activity_kind
-        )
+        self.check.log.debug("[%s] Fetched %d %ss from API", self._pc_label, len(items), activity_kind)
 
         items = self._filter_after_time(items, last_time, time_field)
         if not items:
-            self.check.log.debug(
-                "[PC:%s:%s] No new %ss after filtering", self.check.pc_ip, self.check.pc_port, activity_kind
-            )
+            self.check.log.debug("[%s] No new %ss after filtering", self._pc_label, activity_kind)
             return 0
 
         # Advance past all fetched items before applying resource filters
@@ -103,9 +107,8 @@ def _collect(
                 cache[ext_id] = item
 
         self.check.log.debug(
-            "[PC:%s:%s] Processing %d %ss after filtering",
-            self.check.pc_ip,
-            self.check.pc_port,
+            "[%s] Processing %d %ss after filtering",
+            self._pc_label,
             len(items),
             activity_kind,
         )
@@ -117,9 +120,8 @@ def _collect(
             setattr(self, cache_key, most_recent_time_str)
             self.check.write_persistent_cache(cache_key, most_recent_time_str)
             self.check.log.debug(
-                "[PC:%s:%s] Updated %s to: %s",
-                self.check.pc_ip,
-                self.check.pc_port,
+                "[%s] Updated %s to: %s",
+                self._pc_label,
                 cache_key,
                 most_recent_time_str,
             )
@@ -132,24 +134,22 @@ def _safe_collect(self, activity_kind: str, collect_fn: Callable[[], int]) -> in
             return collect_fn()
         except HTTPError as e:
             self.check.log.error(
-                "[PC:%s:%s] Failed to collect %ss: HTTP %s",
-                self.check.pc_ip,
-                self.check.pc_port,
+                "[%s] Failed to collect %ss: HTTP %s",
+                self._pc_label,
                 activity_kind,
                 e.response.status_code if e.response else "error",
             )
             return 0
         except Exception:
             self.check.log.exception(
-                "[PC:%s:%s] Unexpected error collecting %ss",
-                self.check.pc_ip,
-                self.check.pc_port,
+                "[%s] Unexpected error collecting %ss",
+                self._pc_label,
                 activity_kind,
             )
             return 0
 
-    def collect_events(self) -> int:
-        return self._safe_collect(
+    def collect_events(self) -> None:
+        self.events_count = self._safe_collect(
             "event",
             lambda: self._collect(
                 activity_kind="event",
@@ -160,13 +160,13 @@ def collect_events(self) -> int:
             ),
         )
 
-    def collect_tasks(self) -> int:
+    def collect_tasks(self) -> None:
         def _filter_subtasks(tasks: list[dict]) -> list[dict]:
             if not self.check.collect_subtasks_enabled:
                 return [t for t in tasks if not t.get("parentTask")]
             return tasks
 
-        return self._safe_collect(
+        self.tasks_count = self._safe_collect(
             "task",
             lambda: self._collect(
                 activity_kind="task",
@@ -178,8 +178,8 @@ def _filter_subtasks(tasks: list[dict]) -> list[dict]:
             ),
         )
 
-    def collect_audits(self) -> int:
-        return self._safe_collect(
+    def collect_audits(self) -> None:
+        self.audits_count = self._safe_collect(
             "audit",
             lambda: self._collect(
                 activity_kind="audit",
@@ -190,8 +190,8 @@ def collect_audits(self) -> int:
             ),
         )
 
-    def collect_alerts(self) -> int:
-        return self._safe_collect(
+    def collect_alerts(self) -> None:
+        self.alerts_count = self._safe_collect(
             "alert",
             lambda: self._collect(
                 activity_kind="alert",
@@ -218,30 +218,26 @@ def _list_alerts(self, start_time_str: str) -> list[dict]:
         }
 
         if self.alerts_v42_supported is False:
-            self.check.log.debug(
-                "[PC:%s:%s] Using alerts API v4.0 (v4.2 not supported)", self.check.pc_ip, self.check.pc_port
-            )
+            self.check.log.debug("[%s] Using alerts API v4.0 (v4.2 not supported)", self._pc_label)
             del params["$filter"]
             return self.check._get_paginated_request_data("api/monitoring/v4.0/serviceability/alerts", params=params)
 
         try:
-            self.check.log.debug("[PC:%s:%s] Attempting to use alerts API v4.2", self.check.pc_ip, self.check.pc_port)
+            self.check.log.debug("[%s] Attempting to use alerts API v4.2", self._pc_label)
             result = self.check._get_paginated_request_data("api/monitoring/v4.2/serviceability/alerts", params=params)
             if self.alerts_v42_supported is None:
                 self.check.log.debug(
-                    "[PC:%s:%s] Alerts API v4.2 is supported, caching for future use",
-                    self.check.pc_ip,
-                    self.check.pc_port,
+                    "[%s] Alerts API v4.2 is supported, caching for future use",
+                    self._pc_label,
                 )
                 self.alerts_v42_supported = True
                 self.check.write_persistent_cache("alerts_v42_supported", "True")
             return result
         except HTTPError as e:
             if e.response is not None and e.response.status_code == 404:
                 self.check.log.debug(
-                    "[PC:%s:%s] Alerts API v4.2 not supported, falling back to v4.0 permanently",
-                    self.check.pc_ip,
-                    self.check.pc_port,
+                    "[%s] Alerts API v4.2 not supported, falling back to v4.0 permanently",
+                    self._pc_label,
                 )
                 self.alerts_v42_supported = False
                 self.check.write_persistent_cache("alerts_v42_supported", "False")
@@ -261,28 +257,25 @@ def _get_alert(self, alert_ext_id: str) -> dict | None:
             endpoint = "api/monitoring/v4.2/serviceability/alerts"
 
         self.check.log.debug(
-            "[PC:%s:%s] Alert %s not in cache, fetching from API",
-            self.check.pc_ip,
-            self.check.pc_port,
+            "[%s] Alert %s not in cache, fetching from API",
+            self._pc_label,
             alert_ext_id,
         )
         try:
             alert = self.check._get_request_data(f"{endpoint}/{alert_ext_id}")
             if alert:
                 self.alerts[alert_ext_id] = alert
                 self.check.log.debug(
-                    "[PC:%s:%s] Fetched alert %s: %s",
-                    self.check.pc_ip,
-                    self.check.pc_port,
+                    "[%s] Fetched alert %s: %s",
+                    self._pc_label,
                     alert_ext_id,
                     alert.get("title", ""),
                 )
             return alert
         except Exception as e:
             self.check.log.debug(
-                "[PC:%s:%s] Failed to fetch alert %s: %s",
-                self.check.pc_ip,
-                self.check.pc_port,
+                "[%s] Failed to fetch alert %s: %s",
+                self._pc_label,
                 alert_ext_id,
                 e,
             )
@@ -341,9 +334,8 @@ def _process_audit(self, audit: dict) -> None:
 
         # Log audit submission for duplicate debugging
         self.check.log.debug(
-            "[PC:%s:%s]%s Submitting audit - ID: %s, CreationTime: %s",
-            self.check.pc_ip,
-            self.check.pc_port,
+            "[%s]%s Submitting audit - ID: %s, CreationTime: %s",
+            self._pc_label,
             cluster_label,
             audit_id,
             audit.get("creationTime", "unknown"),
@@ -547,9 +539,7 @@ def _parse_iso(self, timestamp_str: str) -> datetime | None:
         try:
             return datetime.fromisoformat(timestamp_str.replace("Z", "+00:00"))
         except (ValueError, AttributeError):
-            self.check.log.warning(
-                "[PC:%s:%s] Failed to parse timestamp: %s", self.check.pc_ip, self.check.pc_port, timestamp_str
-            )
+            self.check.log.warning("[%s] Failed to parse timestamp: %s", self._pc_label, timestamp_str)
             return None
 
     def _parse_timestamp(self, timestamp_str: str) -> int | None:
 
@@ -60,6 +60,10 @@ def _parse_config(self):
 
         self.batch_vm_collection = is_affirmative(self.instance.get("batch_vm_collection", True))
 
+        self.exclude_filtered_resources_from_cluster_capacity = is_affirmative(
+            self.instance.get("exclude_filtered_resources_from_cluster_capacity", False)
+        )
+
         self.prefix_category_tags = is_affirmative(self.instance.get("prefix_category_tags", False))
 
         self.resource_filters = parse_resource_filters(self.instance.get("resource_filters") or [], self.log)
@@ -144,9 +148,8 @@ def check(self, _):
         self.activity_monitor.reset_state()
 
         if not self._check_health():
-            self.log.warning("[PC:%s:%s] Health check failed, aborting", self.pc_ip, self.pc_port)
+            self.log.error("[PC:%s:%s] Health check failed, aborting", self.pc_ip, self.pc_port)
             return
-
         self.infrastructure_monitor.init_collection_time_window()
         start_time, end_time = self.infrastructure_monitor.collection_time_window
         window_seconds = (datetime.fromisoformat(end_time) - datetime.fromisoformat(start_time)).total_seconds()
@@ -162,7 +165,7 @@ def check(self, _):
 
         self.infrastructure_monitor.collect_cluster_metrics()
 
-        events_count, tasks_count, audits_count, alerts_count = self._collect_activity()
+        self._collect_activity()
 
         if self.infrastructure_monitor.external_tags:
             self.set_external_tags(self.infrastructure_monitor.external_tags)
@@ -173,19 +176,22 @@ def check(self, _):
             self.infrastructure_monitor.cluster_count,
             self.infrastructure_monitor.host_count,
             self.infrastructure_monitor.vm_count,
-            events_count,
-            tasks_count,
-            audits_count,
-            alerts_count,
+            self.activity_monitor.events_count,
+            self.activity_monitor.tasks_count,
+            self.activity_monitor.audits_count,
+            self.activity_monitor.alerts_count,
         )
 
-    def _collect_activity(self) -> tuple[int, int, int, int]:
+    def _collect_activity(self) -> None:
         """Collect events, tasks, audits, and alerts if enabled."""
-        events_count = self.activity_monitor.collect_events() if self.collect_events_enabled else 0
-        alerts_count = self.activity_monitor.collect_alerts() if self.collect_alerts_enabled else 0
-        tasks_count = self.activity_monitor.collect_tasks() if self.collect_tasks_enabled else 0
-        audits_count = self.activity_monitor.collect_audits() if self.collect_audits_enabled else 0
-        return events_count, tasks_count, audits_count, alerts_count
+        if self.collect_events_enabled:
+            self.activity_monitor.collect_events()
+        if self.collect_alerts_enabled:
+            self.activity_monitor.collect_alerts()
+        if self.collect_tasks_enabled:
+            self.activity_monitor.collect_tasks()
+        if self.collect_audits_enabled:
+            self.activity_monitor.collect_audits()
 
     def _check_health(self):
         try:
 
@@ -52,6 +52,10 @@ def instance_enable_legacy_tags_normalization():
     return True
 
 
+def instance_exclude_filtered_resources_from_cluster_capacity():
+    return False
+
+
 def instance_kerberos_auth():
     return 'disabled'
 
 
@@ -87,6 +87,7 @@ class InstanceConfig(BaseModel):
     disable_generic_tags: Optional[bool] = None
     empty_default_hostname: Optional[bool] = None
     enable_legacy_tags_normalization: Optional[bool] = None
+    exclude_filtered_resources_from_cluster_capacity: Optional[bool] = None
     extra_headers: Optional[MappingProxyType[str, Any]] = None
     headers: Optional[MappingProxyType[str, Any]] = None
     kerberos_auth: Optional[Literal['required', 'optional', 'disabled']] = None
 
@@ -167,6 +167,16 @@ instances:
     #
     # collect_subtasks: false
 
+    ## @param exclude_filtered_resources_from_cluster_capacity - boolean - optional - default: false
+    ## Whether to exclude filtered resources (hosts and VMs excluded by power state or
+    ## resource_filters) from cluster-level capacity metrics.
+    ## When false (default), cluster capacity reflects total provisioned resources across
+    ## all hosts and VMs, regardless of whether individual metrics are reported.
+    ## When true, only resources that pass filter checks contribute to cluster capacity
+    ## metrics (total_cores, total_threads, total_bytes, vcpus_allocated, memory_allocated_bytes).
+    #
+    # exclude_filtered_resources_from_cluster_capacity: false
+
     ## @param batch_vm_collection - boolean - optional - default: true
     ## Whether to fetch all VMs in a single paginated API call instead of per-host.
     ## When true, VMs are fetched once and grouped by host in-memory, significantly
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	+Add `exclude_filtered_resources_from_cluster_capacity` option to control whether filtered resources contribute to cluster capacity metrics.