Skip to content

Commit 61c3f8d

Browse files
metrics: update plan_bytes_sent to be a bytes metric (#506)
Instead of printing byte values like `10 GB`, this metric would print `10 B` where `B` is billion. That's confusing bc it looks like "10 bytes". This change fixes that by updating the metric from a counter to a bytes counter.
1 parent 93a9fc4 commit 61c3f8d

2 files changed

Lines changed: 8 additions & 10 deletions

File tree

docs/source/user-guide/metrics.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ This produces an EXPLAIN ANALYZE that spans the whole cluster — every stage an
6666
runtime metrics, including network-level metrics on the boundaries:
6767

6868
```
69-
┌───── DistributedExec ── Tasks: t0:[p0] plan_bytes_sent_0=8.07 K, plan_send_latency_avg_0=22.63ms, ...
69+
┌───── DistributedExec ── Tasks: t0:[p0] plan_bytes_sent_0=8.07 KB, plan_send_latency_avg_0=22.63ms, ...
7070
│ SortPreservingMergeExec: [count(*)@0 DESC], fetch=5, metrics=[output_rows=5, elapsed_compute=391.83µs, ...]
7171
│ [Stage 2] => NetworkCoalesceExec: output_partitions=32, input_tasks=2, metrics=[elapsed_compute=5.86ms, bytes_transferred=20.1 KB, network_latency_p50=366.00µs, network_latency_p95=603.43µs, ...]
7272
└──────────────────────────────────────────────────

src/coordinator/query_coordinator.rs

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,19 +11,17 @@ use crate::worker::generated::worker as pb;
1111
use crate::worker::generated::worker::coordinator_to_worker_msg::Inner;
1212
use crate::worker::generated::worker::set_plan_request::WorkUnitFeedDeclaration;
1313
use crate::{
14-
DISTRIBUTED_DATAFUSION_TASK_ID_LABEL, DistributedCodec, DistributedConfig,
15-
DistributedTaskContext, DistributedWorkUnitFeedContext, TaskEstimator, TaskKey,
16-
TaskRoutingContext, get_distributed_channel_resolver, get_distributed_worker_resolver,
14+
BytesCounterMetric, BytesMetricExt, DISTRIBUTED_DATAFUSION_TASK_ID_LABEL, DistributedCodec,
15+
DistributedConfig, DistributedTaskContext, DistributedWorkUnitFeedContext, TaskEstimator,
16+
TaskKey, TaskRoutingContext, get_distributed_channel_resolver, get_distributed_worker_resolver,
1717
};
1818
use datafusion::common::instant::Instant;
1919
use datafusion::common::runtime::JoinSet;
2020
use datafusion::common::tree_node::{Transformed, TreeNodeRecursion};
2121
use datafusion::common::{DataFusionError, exec_datafusion_err};
2222
use datafusion::common::{Result, exec_err};
2323
use datafusion::execution::TaskContext;
24-
use datafusion::physical_expr_common::metrics::{
25-
Count, ExecutionPlanMetricsSet, Label, MetricBuilder,
26-
};
24+
use datafusion::physical_expr_common::metrics::{ExecutionPlanMetricsSet, Label, MetricBuilder};
2725
use datafusion::physical_plan::ExecutionPlan;
2826
use datafusion_proto::physical_plan::AsExecutionPlan;
2927
use datafusion_proto::protobuf::PhysicalPlanNode;
@@ -198,7 +196,7 @@ impl<'a> StageCoordinator<'a> {
198196
})
199197
})?;
200198
metrics.plan_send_latency.record(&start);
201-
metrics.plan_bytes_sent.add(plan_size);
199+
metrics.plan_bytes_sent.add_bytes(plan_size);
202200
let mut worker_to_coordinator_stream = response.into_inner();
203201
while let Some(msg_or_err) = worker_to_coordinator_stream.next().await {
204202
let msg = msg_or_err.map_err(|err| {
@@ -420,7 +418,7 @@ impl Drop for NotifyGuard {
420418
/// Metrics that measure network details about communications between [DistributedExec] and a worker.
421419
#[derive(Clone)]
422420
pub(super) struct CoordinatorToWorkerMetrics {
423-
pub(super) plan_bytes_sent: Count,
421+
pub(super) plan_bytes_sent: BytesCounterMetric,
424422
pub(super) plan_send_latency: Arc<LatencyMetric>,
425423
pub(super) instantiation_time: u64,
426424
}
@@ -431,7 +429,7 @@ impl CoordinatorToWorkerMetrics {
431429
// Metric that measures to total sum of bytes worth of subplans sent.
432430
plan_bytes_sent: MetricBuilder::new(metrics)
433431
.with_label(Label::new(DISTRIBUTED_DATAFUSION_TASK_ID_LABEL, "0"))
434-
.global_counter("plan_bytes_sent"),
432+
.bytes_counter("plan_bytes_sent"),
435433
// Latency statistics about the network calls issued to the workers for feeding subplans.
436434
plan_send_latency: Arc::new(LatencyMetric::new(
437435
"plan_send_latency",

0 commit comments

Comments
 (0)