forked from vortex-data/vortex
-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathsource.rs
More file actions
351 lines (311 loc) · 13 KB
/
source.rs
File metadata and controls
351 lines (311 loc) · 13 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors
use std::any::Any;
use std::fmt::Formatter;
use std::sync::Arc;
use std::sync::Weak;
use datafusion_common::Result as DFResult;
use datafusion_common::config::ConfigOptions;
use datafusion_datasource::TableSchema;
use datafusion_datasource::file::FileSource;
use datafusion_datasource::file_scan_config::FileScanConfig;
use datafusion_datasource::file_stream::FileOpener;
use datafusion_execution::cache::cache_manager::FileMetadataCache;
use datafusion_physical_expr::PhysicalExprRef;
use datafusion_physical_expr::conjunction;
use datafusion_physical_expr::projection::ProjectionExprs;
use datafusion_physical_expr_adapter::DefaultPhysicalExprAdapterFactory;
use datafusion_physical_expr_common::physical_expr::fmt_sql;
use datafusion_physical_plan::DisplayFormatType;
use datafusion_physical_plan::PhysicalExpr;
use datafusion_physical_plan::filter_pushdown::FilterPushdownPropagation;
use datafusion_physical_plan::filter_pushdown::PushedDown;
use datafusion_physical_plan::filter_pushdown::PushedDownPredicate;
use datafusion_physical_plan::metrics::ExecutionPlanMetricsSet;
use object_store::ObjectStore;
use object_store::path::Path;
use vortex::error::VortexExpect;
use vortex::file::VORTEX_FILE_EXTENSION;
use vortex::layout::LayoutReader;
use vortex::layout::segments::SegmentCacheBuilder;
use vortex::metrics::DefaultMetricsRegistry;
use vortex::metrics::MetricsRegistry;
use vortex::session::VortexSession;
use vortex_utils::aliases::dash_map::DashMap;
use super::opener::VortexOpener;
use crate::VortexTableOptions;
use crate::convert::exprs::DefaultExpressionConvertor;
use crate::convert::exprs::ExpressionConvertor;
use crate::persistent::reader::DefaultVortexReaderFactory;
use crate::persistent::reader::VortexReaderFactory;
/// Execution plan for reading one or more Vortex files, intended to be consumed by [`DataSourceExec`].
///
/// [`DataSourceExec`]: datafusion_datasource::source::DataSourceExec
#[derive(Clone)]
pub struct VortexSource {
pub(crate) session: VortexSession,
pub(crate) table_schema: TableSchema,
pub(crate) projection: ProjectionExprs,
/// Combined predicate expression containing all filters from DataFusion query planning.
/// Used with FilePruner to skip files based on statistics and partition values.
pub(crate) full_predicate: Option<PhysicalExprRef>,
/// Subset of predicates that can be pushed down into Vortex scan operations.
/// These are expressions that Vortex can efficiently evaluate during scanning.
pub(crate) vortex_predicate: Option<PhysicalExprRef>,
pub(crate) batch_size: Option<usize>,
_unused_df_metrics: ExecutionPlanMetricsSet,
/// Shared layout readers, the source only lives as long as one scan.
///
/// Sharing the readers allows us to only read every layout once from the file, even across partitions.
layout_readers: Arc<DashMap<Path, Weak<dyn LayoutReader>>>,
expression_convertor: Arc<dyn ExpressionConvertor>,
pub(crate) vortex_reader_factory: Option<Arc<dyn VortexReaderFactory>>,
vx_metrics_registry: Arc<dyn MetricsRegistry>,
file_metadata_cache: Option<Arc<dyn FileMetadataCache>>,
segment_cache_builder: Option<Arc<dyn SegmentCacheBuilder>>,
/// Whether to enable expression pushdown into the underlying Vortex scan.
options: VortexTableOptions,
}
impl VortexSource {
/// Creates a new VortexSource with default configuration and a provided [`VortexSession`].
/// Meant to be use with a [`FileScanConfig`] to scan a file with the provided schema.
///
/// Can be configured using the provided methods.
pub fn new(table_schema: TableSchema, session: VortexSession) -> Self {
let full_schema = table_schema.table_schema();
let indices = (0..full_schema.fields().len()).collect::<Vec<_>>();
let projection = ProjectionExprs::from_indices(&indices, full_schema);
Self {
session,
table_schema,
projection,
full_predicate: None,
vortex_predicate: None,
batch_size: None,
_unused_df_metrics: Default::default(),
layout_readers: Arc::new(DashMap::default()),
expression_convertor: Arc::new(DefaultExpressionConvertor::default()),
vortex_reader_factory: None,
vx_metrics_registry: Arc::new(DefaultMetricsRegistry::default()),
file_metadata_cache: None,
segment_cache_builder: None,
options: VortexTableOptions::default(),
}
}
/// Enable or disable expression pushdown into the underlying Vortex scan.
pub fn with_projection_pushdown(mut self, enabled: bool) -> Self {
self.options.projection_pushdown = enabled;
self
}
/// Set a [`ExpressionConvertor`] to control how Datafusion expression should be converted and pushed down.
pub fn with_expression_convertor(
mut self,
expr_convertor: Arc<dyn ExpressionConvertor>,
) -> Self {
self.expression_convertor = expr_convertor;
self
}
/// Set a user-defined factory to create the underlying [`VortexReadAt`]
///
/// [`VortexReadAt`]: vortex::io::VortexReadAt
pub fn with_vortex_reader_factory(
mut self,
vortex_reader_factory: Arc<dyn VortexReaderFactory>,
) -> Self {
self.vortex_reader_factory = Some(vortex_reader_factory);
self
}
/// Returns the [`MetricsRegistry`] attached to this source.
pub fn metrics_registry(&self) -> &Arc<dyn MetricsRegistry> {
&self.vx_metrics_registry
}
/// Override the file metadata cache
pub fn with_file_metadata_cache(
mut self,
file_metadata_cache: Arc<dyn FileMetadataCache>,
) -> Self {
self.file_metadata_cache = Some(file_metadata_cache);
self
}
/// Sets a [`SegmentCacheBuilder`] to reuse segment bytes across scans of the same files.
///
/// Without a builder every query re-reads zone map and data segments from object storage.
/// The builder is invoked once per opened file with that file's
/// [`FileIdentity`](vortex::layout::segments::FileIdentity); the returned per-file
/// [`SegmentCache`](vortex::layout::segments::SegmentCache) is wired into the file open
/// path. Use
/// [`NamespacedMokaSegmentCacheBuilder`](vortex::layout::segments::NamespacedMokaSegmentCacheBuilder)
/// for cross-query reuse with a global memory budget, optionally wrapped in
/// [`InstrumentedSegmentCacheBuilder`](vortex::layout::segments::InstrumentedSegmentCacheBuilder)
/// for hit/miss metrics.
pub fn with_segment_cache_builder(mut self, builder: Arc<dyn SegmentCacheBuilder>) -> Self {
self.segment_cache_builder = Some(builder);
self
}
/// Sets the per-file Vortex scan concurrency.
///
/// This is separate from DataFusion's partition-level parallelism.
pub fn with_scan_concurrency(mut self, scan_concurrency: usize) -> Self {
self.options.scan_concurrency = Some(scan_concurrency);
self
}
/// Returns the table options for this source.
pub fn options(&self) -> &VortexTableOptions {
&self.options
}
/// Set the table options for this source.
pub fn with_options(mut self, opts: VortexTableOptions) -> Self {
self.options = opts;
self
}
}
impl FileSource for VortexSource {
fn create_file_opener(
&self,
object_store: Arc<dyn ObjectStore>,
base_config: &FileScanConfig,
partition: usize,
) -> DFResult<Arc<dyn FileOpener>> {
let batch_size = self
.batch_size
.vortex_expect("batch_size must be supplied to VortexSource");
let expr_adapter_factory = base_config
.expr_adapter_factory
.clone()
.unwrap_or_else(|| Arc::new(DefaultPhysicalExprAdapterFactory));
let vortex_reader_factory = self
.vortex_reader_factory
.clone()
.unwrap_or_else(|| Arc::new(DefaultVortexReaderFactory::new(object_store)));
let opener = VortexOpener {
partition,
session: self.session.clone(),
vortex_reader_factory,
projection: self.projection.clone(),
filter: self.vortex_predicate.clone(),
file_pruning_predicate: self.full_predicate.clone(),
expr_adapter_factory,
table_schema: self.table_schema.clone(),
batch_size,
limit: base_config.limit.map(|l| l as u64),
metrics_registry: self.vx_metrics_registry.clone(),
layout_readers: self.layout_readers.clone(),
has_output_ordering: !base_config.output_ordering.is_empty(),
expression_convertor: Arc::new(DefaultExpressionConvertor::default()),
file_metadata_cache: self.file_metadata_cache.clone(),
segment_cache_builder: self.segment_cache_builder.clone(),
projection_pushdown: self.options.projection_pushdown,
scan_concurrency: self.options.scan_concurrency,
};
Ok(Arc::new(opener))
}
fn as_any(&self) -> &dyn Any {
self
}
fn with_batch_size(&self, batch_size: usize) -> Arc<dyn FileSource> {
let mut source = self.clone();
source.batch_size = Some(batch_size);
Arc::new(source)
}
fn filter(&self) -> Option<Arc<dyn PhysicalExpr>> {
self.vortex_predicate.clone()
}
fn metrics(&self) -> &ExecutionPlanMetricsSet {
&self._unused_df_metrics
}
fn file_type(&self) -> &str {
VORTEX_FILE_EXTENSION
}
fn fmt_extra(&self, t: DisplayFormatType, f: &mut Formatter) -> std::fmt::Result {
match t {
DisplayFormatType::Default | DisplayFormatType::Verbose => {
if let Some(ref predicate) = self.vortex_predicate {
write!(f, ", predicate: {predicate}")?;
}
}
// Use TreeRender style key=value formatting to display the predicate
DisplayFormatType::TreeRender => {
if let Some(ref predicate) = self.vortex_predicate {
writeln!(f, "predicate={}", fmt_sql(predicate.as_ref()))?;
};
}
}
Ok(())
}
fn supports_repartitioning(&self) -> bool {
true
}
fn try_pushdown_filters(
&self,
filters: Vec<Arc<dyn PhysicalExpr>>,
_config: &ConfigOptions,
) -> DFResult<FilterPushdownPropagation<Arc<dyn FileSource>>> {
if filters.is_empty() {
return Ok(FilterPushdownPropagation::with_parent_pushdown_result(
vec![],
));
}
let mut source = self.clone();
// Combine new filters with existing predicate for file pruning.
// This full predicate is used by FilePruner to eliminate files.
source.full_predicate = match source.full_predicate {
Some(predicate) => Some(conjunction(
std::iter::once(predicate).chain(filters.clone()),
)),
None => Some(conjunction(filters.clone())),
};
let supported_filters = filters
.into_iter()
.map(|expr| {
if self
.expression_convertor
.can_be_pushed_down(&expr, self.table_schema.file_schema())
{
PushedDownPredicate::supported(expr)
} else {
PushedDownPredicate::unsupported(expr)
}
})
.collect::<Vec<_>>();
if supported_filters
.iter()
.all(|p| matches!(p.discriminant, PushedDown::No))
{
return Ok(FilterPushdownPropagation::with_parent_pushdown_result(
vec![PushedDown::No; supported_filters.len()],
)
.with_updated_node(Arc::new(source) as _));
}
let supported = supported_filters
.iter()
.filter_map(|p| match p.discriminant {
PushedDown::Yes => Some(&p.predicate),
PushedDown::No => None,
})
.cloned();
let predicate = match source.vortex_predicate {
Some(predicate) => conjunction(std::iter::once(predicate).chain(supported)),
None => conjunction(supported),
};
tracing::debug!(%predicate, "Saving predicate");
source.vortex_predicate = Some(predicate);
Ok(FilterPushdownPropagation::with_parent_pushdown_result(
supported_filters.iter().map(|f| f.discriminant).collect(),
)
.with_updated_node(Arc::new(source) as _))
}
fn try_pushdown_projection(
&self,
projection: &ProjectionExprs,
) -> DFResult<Option<Arc<dyn FileSource>>> {
let mut source = self.clone();
source.projection = self.projection.try_merge(projection)?;
Ok(Some(Arc::new(source)))
}
fn projection(&self) -> Option<&ProjectionExprs> {
Some(&self.projection)
}
fn table_schema(&self) -> &TableSchema {
&self.table_schema
}
}