merge and address comments

ShreyeshArangath · ShreyeshArangath · commit a8623c2c48ab · 2026-04-01T14:23:54.000-07:00
diff --git a/crates/core/src/metrics.rs b/crates/core/src/metrics.rs
@@ -18,7 +18,7 @@
 use std::collections::HashMap;
 use std::sync::Arc;
 
-use datafusion::physical_plan::metrics::{MetricValue, MetricsSet, Metric};
+use datafusion::physical_plan::metrics::{MetricValue, MetricsSet, Metric, Timestamp};
 use pyo3::prelude::*;
 
 #[pyclass(frozen, name = "MetricsSet", module = "datafusion")]
@@ -81,6 +81,32 @@ impl PyMetric {
     pub fn new(metric: Arc<Metric>) -> Self {
         Self { metric }
     }
+
+    fn timestamp_to_pyobject<'py>(
+        py: Python<'py>,
+        ts: &Timestamp,
+    ) -> PyResult<Option<Bound<'py, PyAny>>> {
+        match ts.value() {
+            Some(dt) => {
+                let nanos = dt.timestamp_nanos_opt().ok_or_else(|| {
+                    PyErr::new::<pyo3::exceptions::PyOverflowError, _>(
+                        "timestamp out of range",
+                    )
+                })?;
+                let datetime_mod = py.import("datetime")?;
+                let datetime_cls = datetime_mod.getattr("datetime")?;
+                let tz_utc = datetime_mod.getattr("timezone")?.getattr("utc")?;
+                let secs = nanos / 1_000_000_000;
+                let micros = (nanos % 1_000_000_000) / 1_000;
+                let result = datetime_cls.call_method1(
+                    "fromtimestamp",
+                    (secs as f64 + micros as f64 / 1_000_000.0, tz_utc),
+                )?;
+                Ok(Some(result))
+            }
+            None => Ok(None),
+        }
+    }
 }
 
 #[pymethods]
@@ -90,62 +116,30 @@ impl PyMetric {
         self.metric.value().name().to_string()
     }
 
-    /// Returns the numeric value of this metric as a `usize`, or `None` when the
-    /// value is not representable as an integer.
-    ///
-    /// # Note
-    /// `StartTimestamp` and `EndTimestamp` metrics are returned as nanoseconds
-    /// since the Unix epoch (via `timestamp_nanos_opt`), which may overflow
-    /// a `usize` on 32-bit platforms or return `None` if the timestamp is out
-    /// of range.  Non-numeric metric variants (unrecognised future variants)
-    /// also return `None`.
     #[getter]
-    fn value(&self) -> Option<usize> {
+    fn value<'py>(&self, py: Python<'py>) -> PyResult<Option<Bound<'py, PyAny>>> {
         match self.metric.value() {
-            MetricValue::OutputRows(c) => Some(c.value()),
-            MetricValue::OutputBytes(c) => Some(c.value()),
-            MetricValue::ElapsedCompute(t) => Some(t.value()),
-            MetricValue::SpillCount(c) => Some(c.value()),
-            MetricValue::SpilledBytes(c) => Some(c.value()),
-            MetricValue::SpilledRows(c) => Some(c.value()),
-            MetricValue::CurrentMemoryUsage(g) => Some(g.value()),
-            MetricValue::Count { count, .. } => Some(count.value()),
-            MetricValue::Gauge { gauge, .. } => Some(gauge.value()),
-            MetricValue::Time { time, .. } => Some(time.value()),
-            MetricValue::StartTimestamp(ts) => {
-                ts.value().and_then(|dt| dt.timestamp_nanos_opt().map(|n| n as usize))
-            }
-            MetricValue::EndTimestamp(ts) => {
-                ts.value().and_then(|dt| dt.timestamp_nanos_opt().map(|n| n as usize))
+            MetricValue::OutputRows(c) => Ok(Some(c.value().into_pyobject(py)?.into_any())),
+            MetricValue::OutputBytes(c) => Ok(Some(c.value().into_pyobject(py)?.into_any())),
+            MetricValue::ElapsedCompute(t) => Ok(Some(t.value().into_pyobject(py)?.into_any())),
+            MetricValue::SpillCount(c) => Ok(Some(c.value().into_pyobject(py)?.into_any())),
+            MetricValue::SpilledBytes(c) => Ok(Some(c.value().into_pyobject(py)?.into_any())),
+            MetricValue::SpilledRows(c) => Ok(Some(c.value().into_pyobject(py)?.into_any())),
+            MetricValue::CurrentMemoryUsage(g) => Ok(Some(g.value().into_pyobject(py)?.into_any())),
+            MetricValue::Count { count, .. } => Ok(Some(count.value().into_pyobject(py)?.into_any())),
+            MetricValue::Gauge { gauge, .. } => Ok(Some(gauge.value().into_pyobject(py)?.into_any())),
+            MetricValue::Time { time, .. } => Ok(Some(time.value().into_pyobject(py)?.into_any())),
+            MetricValue::StartTimestamp(ts) | MetricValue::EndTimestamp(ts) => {
+                Self::timestamp_to_pyobject(py, ts)
             }
-            _ => None,
+            _ => Ok(None),
         }
     }
 
-    /// Returns the value as a Python `datetime` for `StartTimestamp` / `EndTimestamp`
-    /// metrics, or `None` for all other metric types.
     fn value_as_datetime<'py>(&self, py: Python<'py>) -> PyResult<Option<Bound<'py, PyAny>>> {
         match self.metric.value() {
             MetricValue::StartTimestamp(ts) | MetricValue::EndTimestamp(ts) => {
-                match ts.value() {
-                    Some(dt) => {
-                        let nanos = dt.timestamp_nanos_opt()
-                            .ok_or_else(|| PyErr::new::<pyo3::exceptions::PyOverflowError, _>(
-                                "timestamp out of range"
-                            ))?;
-                        let datetime_mod = py.import("datetime")?;
-                        let datetime_cls = datetime_mod.getattr("datetime")?;
-                        let tz_utc = datetime_mod.getattr("timezone")?.getattr("utc")?;
-                        let secs = nanos / 1_000_000_000;
-                        let micros = (nanos % 1_000_000_000) / 1_000;
-                        let result = datetime_cls.call_method1(
-                            "fromtimestamp",
-                            (secs as f64 + micros as f64 / 1_000_000.0, tz_utc),
-                        )?;
-                        Ok(Some(result))
-                    }
-                    None => Ok(None),
-                }
+                Self::timestamp_to_pyobject(py, ts)
             }
             _ => Ok(None),
         }
diff --git a/crates/core/src/physical_plan.rs b/crates/core/src/physical_plan.rs
@@ -97,7 +97,6 @@ impl PyExecutionPlan {
         Ok(Self::new(plan))
     }
 
-    /// Returns metrics for this plan node after execution, or None if unavailable.
     pub fn metrics(&self) -> Option<PyMetricsSet> {
         self.plan.metrics().map(PyMetricsSet::new)
     }
diff --git a/docs/source/user-guide/dataframe/execution-metrics.rst b/docs/source/user-guide/dataframe/execution-metrics.rst
@@ -53,11 +53,27 @@ Execution is triggered by any of the terminal operations:
 - :py:meth:`~datafusion.DataFrame.collect`
 - :py:meth:`~datafusion.DataFrame.collect_partitioned`
 - :py:meth:`~datafusion.DataFrame.execute_stream`
+  (metrics are available once the stream has been fully consumed)
 - :py:meth:`~datafusion.DataFrame.execute_stream_partitioned`
+  (metrics are available once all partition streams have been fully consumed)
 
 Calling :py:meth:`~datafusion.ExecutionPlan.collect_metrics` before execution
-will return entries with empty (or ``None``) metric sets because the operators
-have not run yet.
+returns an empty list or entries whose values are ``None`` / ``0``.
+
+.. note::
+
+   **display() does not populate metrics.**
+   When a DataFrame is displayed in a notebook (e.g. via ``display(df)`` or
+   automatic ``repr`` output), DataFusion runs a *limited* internal execution
+   to fetch preview rows.  This internal execution does **not** cache the
+   physical plan used, so :py:meth:`~datafusion.ExecutionPlan.collect_metrics`
+   will not reflect the display execution.  To access metrics you must call
+   one of the terminal operations listed above.
+
+If you call :py:meth:`~datafusion.DataFrame.collect` (or another terminal
+operation) multiple times on the same DataFrame, each call creates a fresh
+physical plan.  Metrics from :py:meth:`~datafusion.DataFrame.execution_plan`
+always reflect the **most recent** execution.
 
 Reading the Physical Plan Tree
 --------------------------------
@@ -72,6 +88,27 @@ The ``operator_name`` string returned by
 the node, for example ``"FilterExec: column1@0 > 1"``. This is the same string
 you would see when calling ``plan.display()``.
 
+Aggregated vs Per-Partition Metrics
+------------------------------------
+
+DataFusion executes each operator across one or more **partitions** in
+parallel.  The :py:class:`~datafusion.MetricsSet` convenience properties
+(``output_rows``, ``elapsed_compute``, etc.) automatically **sum** the named
+metric across all partitions, giving a single aggregate value.
+
+To inspect individual partitions — for example to detect data skew where one
+partition processes far more rows than others — iterate over the raw
+:py:class:`~datafusion.Metric` objects:
+
+.. code-block:: python
+
+    for metric in metrics_set.metrics():
+        print(f"  partition={metric.partition}  {metric.name}={metric.value}")
+
+The ``partition`` property is a 0-based index (``0``, ``1``, …) identifying
+which parallel slot processed this metric.  It is ``None`` for metrics that
+apply globally (not tied to a specific partition).
+
 Available Metrics
 -----------------
 
@@ -87,15 +124,19 @@ The following metrics are directly accessible as properties on
    * - ``output_rows``
      - Number of rows emitted by the operator (summed across partitions).
    * - ``elapsed_compute``
-     - CPU time in nanoseconds spent inside the operator's execute loop
-       (summed across partitions).
+     - Wall-clock CPU time **in nanoseconds** spent inside the operator's
+       compute loop, excluding I/O wait.  Useful for identifying which
+       operators are most expensive (summed across partitions).
    * - ``spill_count``
-     - Number of spill-to-disk events due to memory pressure (summed across
+     - Number of spill-to-disk events triggered by memory pressure.  This is
+       a unitless count of events, not a measure of data volume (summed across
        partitions).
    * - ``spilled_bytes``
-     - Total bytes written to disk during spills (summed across partitions).
+     - Total bytes written to disk during spill events (summed across
+       partitions).
    * - ``spilled_rows``
-     - Total rows written to disk during spills (summed across partitions).
+     - Total rows written to disk during spill events (summed across
+       partitions).
 
 Any metric not listed above can be accessed via
 :py:meth:`~datafusion.MetricsSet.sum_by_name`, or by iterating over the raw
@@ -106,17 +147,24 @@ Labels
 ------
 
 A :py:class:`~datafusion.Metric` may carry *labels*: key/value pairs that
-provide additional context. For example, some operators tag their output
-metrics with an ``output_type`` label to distinguish between intermediate and
-final output:
+provide additional context.  Labels are operator-specific; most metrics have
+an empty label dict.
+
+Some operators tag their metrics with labels to distinguish variants.  For
+example, a ``HashAggregateExec`` may record separate ``output_rows`` metrics
+for intermediate and final output:
 
 .. code-block:: python
 
     for metric in metrics_set.metrics():
         print(metric.name, metric.labels())
     # output_rows  {'output_type': 'final'}
+    # output_rows  {'output_type': 'intermediate'}
 
-Labels are operator-specific; most metrics have no labels.
+When summing by name (via :py:attr:`~datafusion.MetricsSet.output_rows` or
+:py:meth:`~datafusion.MetricsSet.sum_by_name`), **all** metrics with that
+name are summed regardless of labels.  To filter by label, iterate over the
+raw :py:class:`~datafusion.Metric` objects directly.
 
 End-to-End Example
 ------------------
diff --git a/python/datafusion/plan.py b/python/datafusion/plan.py
@@ -287,20 +287,23 @@ def name(self) -> str:
         return self._raw.name
 
     @property
-    def value(self) -> int | None:
-        """The numeric value of this metric, or ``None`` when not representable.
-
-        ``None`` is returned for metric types whose value has not yet been set
-        (e.g. ``StartTimestamp`` / ``EndTimestamp`` before the operator runs)
-        and for any metric variant whose value cannot be expressed as an integer.
-        Timestamp metrics, when available, are returned as nanoseconds since the
-        Unix epoch.
+    def value(self) -> int | datetime.datetime | None:
+        """The value of this metric.
+
+        Returns an ``int`` for counters, gauges, and time-based metrics
+        (nanoseconds), a :py:class:`~datetime.datetime` (UTC) for
+        ``start_timestamp`` / ``end_timestamp`` metrics, or ``None``
+        when the value has not been set or is not representable.
         """
         return self._raw.value
 
     @property
     def value_as_datetime(self) -> datetime.datetime | None:
-        """The value as a UTC datetime for timestamp metrics, or ``None``."""
+        """The value as a UTC :py:class:`~datetime.datetime` for timestamp metrics.
+
+        Returns ``None`` for all non-timestamp metrics and for timestamp
+        metrics whose value has not been set (e.g. before execution).
+        """
         return self._raw.value_as_datetime()
 
     @property
diff --git a/python/tests/test_plans.py b/python/tests/test_plans.py

Original file line number	Diff line number	Diff line change
`@@ -97,7 +97,6 @@ impl PyExecutionPlan {`
`97`	`97`	`Ok(Self::new(plan))`
`98`	`98`	`}`
`99`	`99`
`100`		`- /// Returns metrics for this plan node after execution, or None if unavailable.`
`101`	`100`	`pub fn metrics(&self) -> Option<PyMetricsSet> {`
`102`	`101`	`self.plan.metrics().map(PyMetricsSet::new)`
`103`	`102`	`}`