Skip to content

Commit e049019

Browse files
committed
Optimize metric label cloning
1 parent c8b784a commit e049019

3 files changed

Lines changed: 99 additions & 50 deletions

File tree

datafusion/datasource-parquet/src/metrics.rs

Lines changed: 29 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@
1616
// under the License.
1717

1818
use datafusion_physical_plan::metrics::{
19-
Count, ExecutionPlanMetricsSet, Gauge, MetricBuilder, MetricCategory, MetricType,
20-
PruningMetrics, RatioMergeStrategy, RatioMetrics, Time,
19+
Count, ExecutionPlanMetricsSet, Gauge, Label, MetricBuilder, MetricCategory,
20+
MetricType, PruningMetrics, RatioMergeStrategy, RatioMetrics, Time,
2121
};
2222

2323
/// Stores metrics about the parquet execution for a particular parquet file.
@@ -100,46 +100,44 @@ impl ParquetFileMetrics {
100100
filename: &str,
101101
metrics: &ExecutionPlanMetricsSet,
102102
) -> Self {
103+
// Share the filename label across all per-file metrics to avoid
104+
// allocating the same filename string for each metric.
105+
let filename_label = Label::new("filename", filename.to_string());
106+
let builder = || MetricBuilder::new(metrics).with_label(filename_label.clone());
107+
103108
// -----------------------
104109
// 'summary' level metrics
105110
// -----------------------
106-
let row_groups_pruned_bloom_filter = MetricBuilder::new(metrics)
107-
.with_new_label("filename", filename.to_string())
111+
let row_groups_pruned_bloom_filter = builder()
108112
.with_type(MetricType::Summary)
109113
.pruning_metrics("row_groups_pruned_bloom_filter", partition);
110114

111-
let limit_pruned_row_groups = MetricBuilder::new(metrics)
112-
.with_new_label("filename", filename.to_string())
115+
let limit_pruned_row_groups = builder()
113116
.with_type(MetricType::Summary)
114117
.pruning_metrics("limit_pruned_row_groups", partition);
115118

116-
let row_groups_pruned_statistics = MetricBuilder::new(metrics)
117-
.with_new_label("filename", filename.to_string())
119+
let row_groups_pruned_statistics = builder()
118120
.with_type(MetricType::Summary)
119121
.pruning_metrics("row_groups_pruned_statistics", partition);
120122

121-
let page_index_pages_pruned = MetricBuilder::new(metrics)
122-
.with_new_label("filename", filename.to_string())
123+
let page_index_pages_pruned = builder()
123124
.with_type(MetricType::Summary)
124125
.pruning_metrics("page_index_pages_pruned", partition);
125126

126-
let bytes_scanned = MetricBuilder::new(metrics)
127-
.with_new_label("filename", filename.to_string())
127+
let bytes_scanned = builder()
128128
.with_type(MetricType::Summary)
129129
.with_category(MetricCategory::Bytes)
130130
.counter("bytes_scanned", partition);
131131

132-
let metadata_load_time = MetricBuilder::new(metrics)
133-
.with_new_label("filename", filename.to_string())
132+
let metadata_load_time = builder()
134133
.with_type(MetricType::Summary)
135134
.subset_time("metadata_load_time", partition);
136135

137136
let files_ranges_pruned_statistics = MetricBuilder::new(metrics)
138137
.with_type(MetricType::Summary)
139138
.pruning_metrics("files_ranges_pruned_statistics", partition);
140139

141-
let scan_efficiency_ratio = MetricBuilder::new(metrics)
142-
.with_new_label("filename", filename.to_string())
140+
let scan_efficiency_ratio = builder()
143141
.with_type(MetricType::Summary)
144142
.ratio_metrics_with_strategy(
145143
"scan_efficiency_ratio",
@@ -150,45 +148,35 @@ impl ParquetFileMetrics {
150148
// -----------------------
151149
// 'dev' level metrics
152150
// -----------------------
153-
let predicate_evaluation_errors = MetricBuilder::new(metrics)
154-
.with_new_label("filename", filename.to_string())
151+
let predicate_evaluation_errors = builder()
155152
.with_category(MetricCategory::Rows)
156153
.counter("predicate_evaluation_errors", partition);
157154

158-
let pushdown_rows_pruned = MetricBuilder::new(metrics)
159-
.with_new_label("filename", filename.to_string())
155+
let pushdown_rows_pruned = builder()
160156
.with_category(MetricCategory::Rows)
161157
.counter("pushdown_rows_pruned", partition);
162-
let pushdown_rows_matched = MetricBuilder::new(metrics)
163-
.with_new_label("filename", filename.to_string())
158+
let pushdown_rows_matched = builder()
164159
.with_category(MetricCategory::Rows)
165160
.counter("pushdown_rows_matched", partition);
166161

167-
let row_pushdown_eval_time = MetricBuilder::new(metrics)
168-
.with_new_label("filename", filename.to_string())
169-
.subset_time("row_pushdown_eval_time", partition);
170-
let statistics_eval_time = MetricBuilder::new(metrics)
171-
.with_new_label("filename", filename.to_string())
172-
.subset_time("statistics_eval_time", partition);
173-
let bloom_filter_eval_time = MetricBuilder::new(metrics)
174-
.with_new_label("filename", filename.to_string())
175-
.subset_time("bloom_filter_eval_time", partition);
162+
let row_pushdown_eval_time =
163+
builder().subset_time("row_pushdown_eval_time", partition);
164+
let statistics_eval_time =
165+
builder().subset_time("statistics_eval_time", partition);
166+
let bloom_filter_eval_time =
167+
builder().subset_time("bloom_filter_eval_time", partition);
176168

177-
let page_index_eval_time = MetricBuilder::new(metrics)
178-
.with_new_label("filename", filename.to_string())
179-
.subset_time("page_index_eval_time", partition);
169+
let page_index_eval_time =
170+
builder().subset_time("page_index_eval_time", partition);
180171

181-
let page_index_rows_pruned = MetricBuilder::new(metrics)
182-
.with_new_label("filename", filename.to_string())
183-
.pruning_metrics("page_index_rows_pruned", partition);
172+
let page_index_rows_pruned =
173+
builder().pruning_metrics("page_index_rows_pruned", partition);
184174

185-
let predicate_cache_inner_records = MetricBuilder::new(metrics)
186-
.with_new_label("filename", filename.to_string())
175+
let predicate_cache_inner_records = builder()
187176
.with_category(MetricCategory::Rows)
188177
.gauge("predicate_cache_inner_records", partition);
189178

190-
let predicate_cache_records = MetricBuilder::new(metrics)
191-
.with_new_label("filename", filename.to_string())
179+
let predicate_cache_records = builder()
192180
.with_category(MetricCategory::Rows)
193181
.gauge("predicate_cache_records", partition);
194182

datafusion/physical-expr-common/src/metrics/builder.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,8 @@ use super::{
3131
/// Structure for constructing metrics, counters, timers, etc.
3232
///
3333
/// Note the use of `Cow<..>` is to avoid allocations in the common
34-
/// case of constant strings
34+
/// case of constant strings. Dynamically created label strings are shared when
35+
/// [`Label`] values are cloned.
3536
///
3637
/// ```rust
3738
/// use datafusion_physical_expr_common::metrics::*;

datafusion/physical-expr-common/src/metrics/mod.rs

Lines changed: 68 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ use parking_lot::Mutex;
3030
use std::{
3131
borrow::Cow,
3232
fmt::{self, Debug, Display},
33+
hash::{Hash, Hasher},
3334
sync::Arc,
3435
vec::IntoIter,
3536
};
@@ -519,12 +520,14 @@ impl From<MetricsSet> for ExecutionPlanMetricsSet {
519520
/// telemetry]<https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/metrics/data-model.md>,
520521
/// etc.
521522
///
522-
/// As the name and value are expected to mostly be constant strings,
523-
/// use a [`Cow`] to avoid copying / allocations in this common case.
523+
/// As the name and value are expected to often be constant strings, borrowed
524+
/// static strings avoid allocations in that common case. Dynamic strings are
525+
/// stored behind [`Arc<str>`] so cloning labels does not copy the underlying
526+
/// string data.
524527
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
525528
pub struct Label {
526-
name: Cow<'static, str>,
527-
value: Cow<'static, str>,
529+
name: LabelValue,
530+
value: LabelValue,
528531
}
529532

530533
impl Label {
@@ -533,19 +536,19 @@ impl Label {
533536
name: impl Into<Cow<'static, str>>,
534537
value: impl Into<Cow<'static, str>>,
535538
) -> Self {
536-
let name = name.into();
537-
let value = value.into();
539+
let name = LabelValue::from(name.into());
540+
let value = LabelValue::from(value.into());
538541
Self { name, value }
539542
}
540543

541544
/// Returns the name of this label
542545
pub fn name(&self) -> &str {
543-
self.name.as_ref()
546+
self.name.as_str()
544547
}
545548

546549
/// Returns the value of this label
547550
pub fn value(&self) -> &str {
548-
self.value.as_ref()
551+
self.value.as_str()
549552
}
550553
}
551554

@@ -555,6 +558,54 @@ impl Display for Label {
555558
}
556559
}
557560

561+
#[derive(Clone, Eq)]
562+
enum LabelValue {
563+
Static(&'static str),
564+
Shared(Arc<str>),
565+
}
566+
567+
impl LabelValue {
568+
fn as_str(&self) -> &str {
569+
match self {
570+
Self::Static(value) => value,
571+
Self::Shared(value) => value.as_ref(),
572+
}
573+
}
574+
}
575+
576+
impl From<Cow<'static, str>> for LabelValue {
577+
fn from(value: Cow<'static, str>) -> Self {
578+
match value {
579+
Cow::Borrowed(value) => Self::Static(value),
580+
Cow::Owned(value) => Self::Shared(Arc::from(value)),
581+
}
582+
}
583+
}
584+
585+
impl PartialEq for LabelValue {
586+
fn eq(&self, other: &Self) -> bool {
587+
self.as_str() == other.as_str()
588+
}
589+
}
590+
591+
impl Hash for LabelValue {
592+
fn hash<H: Hasher>(&self, state: &mut H) {
593+
self.as_str().hash(state);
594+
}
595+
}
596+
597+
impl Debug for LabelValue {
598+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
599+
Debug::fmt(self.as_str(), f)
600+
}
601+
}
602+
603+
impl Display for LabelValue {
604+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
605+
Display::fmt(self.as_str(), f)
606+
}
607+
}
608+
558609
#[cfg(test)]
559610
mod tests {
560611
use std::time::Duration;
@@ -609,6 +660,15 @@ mod tests {
609660
assert_eq!("output_rows{partition=2, foo=bar}=66", metric.to_string())
610661
}
611662

663+
#[test]
664+
fn test_label_owned_and_borrowed_values_are_equal() {
665+
let borrowed = Label::new("foo", "bar");
666+
let owned = Label::new("foo".to_string(), "bar".to_string());
667+
668+
assert_eq!(borrowed, owned);
669+
assert_eq!(borrowed.to_string(), owned.to_string());
670+
}
671+
612672
#[test]
613673
fn test_output_rows() {
614674
let metrics = ExecutionPlanMetricsSet::new();

0 commit comments

Comments
 (0)