Skip to content

Commit 7631a82

Browse files
author
ShreyeshArangath
committed
address some more comments
1 parent 98d5904 commit 7631a82

File tree

6 files changed

+24
-15
lines changed

6 files changed

+24
-15
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ tokio = { version = "1.50" }
3535
pyo3 = { version = "0.28" }
3636
pyo3-async-runtimes = { version = "0.28" }
3737
pyo3-log = "0.13.3"
38+
chrono = { version = "0.4", default-features = false }
3839
arrow = { version = "58" }
3940
arrow-array = { version = "58" }
4041
arrow-schema = { version = "58" }

crates/core/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ pyo3 = { workspace = true, features = [
4747
] }
4848
pyo3-async-runtimes = { workspace = true, features = ["tokio-runtime"] }
4949
pyo3-log = { workspace = true }
50+
chrono = { workspace = true }
5051
arrow = { workspace = true, features = ["pyarrow"] }
5152
arrow-select = { workspace = true }
5253
datafusion = { workspace = true, features = ["avro", "unicode_expressions"] }

crates/core/src/metrics.rs

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
use std::collections::HashMap;
1919
use std::sync::Arc;
2020

21+
use chrono::{Datelike, Timelike};
2122
use datafusion::physical_plan::metrics::{Metric, MetricValue, MetricsSet, Timestamp};
2223
use pyo3::prelude::*;
2324

@@ -88,18 +89,19 @@ impl PyMetric {
8889
) -> PyResult<Option<Bound<'py, PyAny>>> {
8990
match ts.value() {
9091
Some(dt) => {
91-
let nanos = dt.timestamp_nanos_opt().ok_or_else(|| {
92-
PyErr::new::<pyo3::exceptions::PyOverflowError, _>("timestamp out of range")
93-
})?;
9492
let datetime_mod = py.import("datetime")?;
9593
let datetime_cls = datetime_mod.getattr("datetime")?;
9694
let tz_utc = datetime_mod.getattr("timezone")?.getattr("utc")?;
97-
let secs = nanos / 1_000_000_000;
98-
let micros = (nanos % 1_000_000_000) / 1_000;
99-
let result = datetime_cls.call_method1(
100-
"fromtimestamp",
101-
(secs as f64 + micros as f64 / 1_000_000.0, tz_utc),
102-
)?;
95+
let result = datetime_cls.call1((
96+
dt.year(),
97+
dt.month(),
98+
dt.day(),
99+
dt.hour(),
100+
dt.minute(),
101+
dt.second(),
102+
dt.timestamp_subsec_micros(),
103+
tz_utc,
104+
))?;
103105
Ok(Some(result))
104106
}
105107
None => Ok(None),
@@ -138,6 +140,7 @@ impl PyMetric {
138140
}
139141
}
140142

143+
#[getter]
141144
fn value_as_datetime<'py>(&self, py: Python<'py>) -> PyResult<Option<Bound<'py, PyAny>>> {
142145
match self.metric.value() {
143146
MetricValue::StartTimestamp(ts) | MetricValue::EndTimestamp(ts) => {

docs/source/user-guide/dataframe/execution-metrics.rst

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,12 @@ per-partition :py:class:`~datafusion.Metric` objects via
4747
When Are Metrics Available?
4848
---------------------------
4949

50-
Metrics are populated only **after** the DataFrame has been executed.
51-
Execution is triggered by any of the terminal operations:
50+
Some operators (for example ``DataSourceExec``) eagerly create a
51+
:py:class:`~datafusion.MetricsSet` when the physical plan is built, so
52+
:py:meth:`~datafusion.ExecutionPlan.metrics` may return a set even before any
53+
rows have been processed. However, metric **values** such as ``output_rows``
54+
are only meaningful **after** the DataFrame has been executed via one of the
55+
terminal operations:
5256

5357
- :py:meth:`~datafusion.DataFrame.collect`
5458
- :py:meth:`~datafusion.DataFrame.collect_partitioned`
@@ -57,8 +61,7 @@ Execution is triggered by any of the terminal operations:
5761
- :py:meth:`~datafusion.DataFrame.execute_stream_partitioned`
5862
(metrics are available once all partition streams have been fully consumed)
5963

60-
Calling :py:meth:`~datafusion.ExecutionPlan.collect_metrics` before execution
61-
returns an empty list or entries whose values are ``None`` / ``0``.
64+
Before execution, metric values will be ``0`` or ``None``.
6265

6366
.. note::
6467

python/datafusion/plan.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ def to_proto(self) -> bytes:
157157
return self._raw_plan.to_proto()
158158

159159
def metrics(self) -> MetricsSet | None:
160-
"""Return metrics for this plan node, or None if this node has no MetricsSet.
160+
"""Return metrics for this plan node, or None if this plan has no MetricsSet.
161161
162162
Some operators (e.g. DataSourceExec) eagerly initialize a MetricsSet
163163
when the plan is created, so this may return a set even before
@@ -304,7 +304,7 @@ def value_as_datetime(self) -> datetime.datetime | None:
304304
Returns ``None`` for all non-timestamp metrics and for timestamp
305305
metrics whose value has not been set (e.g. before execution).
306306
"""
307-
return self._raw.value_as_datetime()
307+
return self._raw.value_as_datetime
308308

309309
@property
310310
def partition(self) -> int | None:

0 commit comments

Comments
 (0)