@@ -15,8 +15,9 @@ use datafusion_common::GetExt;
1515use datafusion_common:: Result as DFResult ;
1616use datafusion_common:: ScalarValue as DFScalarValue ;
1717use datafusion_common:: Statistics ;
18+ use datafusion_common:: config:: ConfigExtension ;
1819use datafusion_common:: config:: ConfigField ;
19- use datafusion_common:: config_namespace ;
20+ use datafusion_common:: extensions_options ;
2021use datafusion_common:: internal_datafusion_err;
2122use datafusion_common:: not_impl_err;
2223use datafusion_common:: parsers:: CompressionTypeVariant ;
@@ -132,28 +133,62 @@ impl Debug for VortexFormat {
132133 }
133134}
134135
135- config_namespace ! {
136+ extensions_options ! {
136137 /// Options to configure [`VortexFormat`] and [`VortexSource`].
137138 ///
138- /// These options are usually set on a [`VortexFormatFactory`] and inherited
139- /// by the `VortexFormat` / `VortexSource` instances created for individual
140- /// tables.
139+ /// The API follows DataFusion's built-in Parquet and JSON format factories:
140+ /// a format factory may carry customized defaults, the session may carry
141+ /// format defaults, and `CREATE EXTERNAL TABLE ... OPTIONS(...)` can
142+ /// override individual fields for one table.
143+ ///
144+ /// [`FileFormatFactory::create`] builds the `VortexTableOptions` copied into
145+ /// each [`VortexFormat`] as follows:
146+ ///
147+ /// 1. If the factory has explicit options from
148+ /// [`VortexFormatFactory::with_options`] or
149+ /// [`VortexFormatFactory::new_with_options`], start from that complete
150+ /// `VortexTableOptions` value. This matches
151+ /// [`ParquetFormatFactory::new_with_options`] and
152+ /// [`JsonFormatFactory::new_with_options`]: factory options replace
153+ /// session defaults; they are not merged with them field-by-field.
154+ /// 2. If the factory does not have explicit options, read the session's
155+ /// `vortex` extension at the time `create` is called. This is the value
156+ /// changed by `SET vortex.<option> = ...`.
157+ /// 3. If the session has no `vortex` extension, start from
158+ /// `VortexTableOptions::default()`.
159+ /// 4. Apply table `OPTIONS(...)` last. Each option overwrites only its
160+ /// matching field, so per-table settings can override either the factory
161+ /// options or the session/default value.
162+ ///
163+ /// In SQL, session settings use the `vortex.` prefix. Table options use the
164+ /// field names directly, the same style as Parquet or JSON table options:
165+ ///
166+ /// ```text
167+ /// SET vortex.predicate_pushdown = false;
168+ ///
169+ /// CREATE EXTERNAL TABLE t (x BIGINT)
170+ /// STORED AS vortex
171+ /// LOCATION 's3://bucket/path/'
172+ /// OPTIONS(predicate_pushdown 'true');
173+ /// ```
141174 ///
142175 /// # Example
143176 ///
144177 /// ```rust
145178 /// use vortex_datafusion::{VortexFormatFactory, VortexTableOptions};
146179 ///
147- /// let factory = VortexFormatFactory::new().with_options(VortexTableOptions {
148- /// projection_pushdown: true,
149- /// predicate_pushdown: true,
150- /// scan_concurrency: Some(8),
151- /// ..Default::default()
152- /// } );
180+ /// let mut options = VortexTableOptions::default();
181+ /// options.predicate_pushdown = true;
182+ /// options.projection_pushdown = true;
183+ /// options. scan_concurrency = Some(8);
184+ ///
185+ /// let factory = VortexFormatFactory::new().with_options(options );
153186 /// # let _ = factory;
154187 /// ```
155188 ///
156189 /// [`SessionConfig`]: https://docs.rs/datafusion/latest/datafusion/prelude/struct.SessionConfig.html
190+ /// [`ParquetFormatFactory::new_with_options`]: https://docs.rs/datafusion/latest/datafusion/datasource/file_format/parquet/struct.ParquetFormatFactory.html#method.new_with_options
191+ /// [`JsonFormatFactory::new_with_options`]: https://docs.rs/datafusion/latest/datafusion/datasource/file_format/json/struct.JsonFormatFactory.html#method.new_with_options
157192 pub struct VortexTableOptions {
158193 /// The number of bytes to read when parsing a file footer.
159194 ///
@@ -165,33 +200,46 @@ config_namespace! {
165200 /// When enabled, projection expressions may be partially evaluated during
166201 /// the scan. When disabled, Vortex reads only the referenced columns and
167202 /// all expressions are evaluated after the scan.
203+ ///
204+ /// Disabled by default.
168205 pub projection_pushdown: bool , default = false
169206 /// Whether to enable predicate pushdown into the underlying Vortex scan.
170207 ///
171208 /// When enabled, supported filters are evaluated during the scan. When
172209 /// disabled, DataFusion evaluates filters after the scan, while
173210 /// `VortexSource` can still use the full predicate for file pruning.
211+ ///
212+ /// Enabled by default.
174213 pub predicate_pushdown: bool , default = true
175214 /// The intra-partition scan concurrency, controlling the number of row splits to process
176215 /// concurrently per-thread within each file.
177216 ///
178217 /// This does not affect the overall parallelism
179218 /// across partitions, which is controlled by DataFusion's execution configuration.
219+ ///
220+ /// Leave as `None` to use Vortex's scan default. Override per session
221+ /// with `SET vortex.scan_concurrency = <n>`, or per table with
222+ /// `OPTIONS(scan_concurrency '<n>')`.
180223 pub scan_concurrency: Option <usize >, default = None
181224 }
182225}
183226
184- impl Eq for VortexTableOptions { }
227+ impl ConfigExtension for VortexTableOptions {
228+ const PREFIX : & ' static str = "vortex" ;
229+ }
185230
186231/// Registration entry point for the file-backed Vortex integration.
187232///
188233/// `VortexFormatFactory` is the type most applications use. Register it with a
189234/// DataFusion session, and DataFusion will create [`VortexFormat`] values for
190235/// `CREATE EXTERNAL TABLE`, [`ListingTable`], and URL-table scans.
191236///
192- /// The factory stores a [`VortexSession`] and default [`VortexTableOptions`].
193- /// Those defaults are copied into the formats and sources created for each
194- /// table.
237+ /// The factory stores a [`VortexSession`] and optional factory-level
238+ /// [`VortexTableOptions`]. When options are set on the factory they act like
239+ /// customized format defaults, matching DataFusion's Parquet and JSON factory
240+ /// APIs. Otherwise, `VortexFormatFactory::create` uses the session's `vortex`
241+ /// options. In both cases, table `OPTIONS(...)` are applied last for the table
242+ /// being created.
195243///
196244/// # Example
197245///
@@ -203,11 +251,11 @@ impl Eq for VortexTableOptions {}
203251/// use datafusion_common::GetExt;
204252/// use vortex_datafusion::{VortexFormatFactory, VortexTableOptions};
205253///
206- /// let factory = Arc::new(VortexFormatFactory::new().with_options(VortexTableOptions {
207- /// projection_pushdown: true,
208- /// predicate_pushdown: true,
209- /// ..Default::default()
210- /// } ));
254+ /// let mut options = VortexTableOptions::default();
255+ /// options.predicate_pushdown = true;
256+ /// options.projection_pushdown = true;
257+ ///
258+ /// let factory = Arc::new(VortexFormatFactory::new().with_options(options ));
211259///
212260/// let mut state_builder = SessionStateBuilder::new()
213261/// .with_default_features()
@@ -235,7 +283,12 @@ impl GetExt for VortexFormatFactory {
235283}
236284
237285impl VortexFormatFactory {
238- /// Creates a factory with a default [`VortexSession`] and default options.
286+ /// Creates a factory with a default [`VortexSession`] and no factory-level
287+ /// options.
288+ ///
289+ /// Formats created by this factory start from the session's `vortex`
290+ /// options, or from [`VortexTableOptions::default`] if the session does not
291+ /// contain them. Table-level `OPTIONS(...)` are still applied last.
239292 #[ expect(
240293 clippy:: new_without_default,
241294 reason = "FormatFactory defines `default` method, so having `Default` implementation is confusing"
@@ -247,33 +300,37 @@ impl VortexFormatFactory {
247300 }
248301 }
249302
250- /// Creates a factory with an explicit session and default options.
303+ /// Creates a factory with an explicit session and factory-level options.
251304 ///
252- /// The supplied options become the baseline for every [`VortexFormat`]
253- /// created by this factory. DataFusion may still override them with
254- /// table-level options passed into [`FileFormatFactory::create`].
305+ /// The supplied options become the complete starting value for every
306+ /// [`VortexFormat`] created by this factory. Session `SET vortex.*` values
307+ /// are ignored for these formats, matching DataFusion's built-in
308+ /// `new_with_options` factories. Table-level `OPTIONS(...)` are still
309+ /// applied last.
255310 pub fn new_with_options ( session : VortexSession , options : VortexTableOptions ) -> Self {
256311 Self {
257312 session,
258313 options : Some ( options) ,
259314 }
260315 }
261316
262- /// Overrides the default options for this factory .
317+ /// Sets factory-level options.
263318 ///
264- /// This is the usual way to turn on features such as projection pushdown for
265- /// every table created through the factory.
319+ /// This is the usual way to customize Vortex defaults for every table
320+ /// created through the factory. These options replace, rather than merge
321+ /// with, session `SET vortex.*` values. Table-level `OPTIONS(...)` are still
322+ /// applied last.
266323 ///
267324 /// # Example
268325 ///
269326 /// ```rust
270327 /// use vortex_datafusion::{VortexFormatFactory, VortexTableOptions};
271328 ///
272- /// let factory = VortexFormatFactory::new().with_options(VortexTableOptions {
273- /// projection_pushdown: true,
274- /// predicate_pushdown: true,
275- /// ..Default::default()
276- /// } );
329+ /// let mut options = VortexTableOptions::default();
330+ /// options.predicate_pushdown = true;
331+ /// options.projection_pushdown = true;
332+ ///
333+ /// let factory = VortexFormatFactory::new().with_options(options );
277334 /// # let _ = factory;
278335 /// ```
279336 pub fn with_options ( mut self , options : VortexTableOptions ) -> Self {
@@ -286,13 +343,32 @@ impl FileFormatFactory for VortexFormatFactory {
286343 #[ expect( clippy:: disallowed_types, reason = "required by trait signature" ) ]
287344 fn create (
288345 & self ,
289- _state : & dyn Session ,
346+ state : & dyn Session ,
290347 format_options : & std:: collections:: HashMap < String , String > ,
291348 ) -> DFResult < Arc < dyn FileFormat > > {
292- let mut opts = self . options . clone ( ) . unwrap_or_default ( ) ;
349+ // This mirrors DataFusion's Parquet/JSON file-format factories:
350+ //
351+ // 1. Factory options are a complete customized default when present.
352+ // 2. Without factory options, use the session's `vortex` extension
353+ // (`SET vortex.* = ...`), falling back to built-in defaults.
354+ // 3. Table-level `CREATE EXTERNAL TABLE ... OPTIONS(...)` values apply
355+ // last. DataFusion prefixes file-format options with `format.`
356+ // before passing them to this factory; SQL users write the field
357+ // name directly, e.g. `OPTIONS(predicate_pushdown 'false')`.
358+ let mut opts = self
359+ . options
360+ . clone ( )
361+ . or_else ( || {
362+ state
363+ . config_options ( )
364+ . extensions
365+ . get :: < VortexTableOptions > ( )
366+ . cloned ( )
367+ } )
368+ . unwrap_or_default ( ) ;
293369 for ( key, value) in format_options {
294370 if let Some ( key) = key. strip_prefix ( "format." ) {
295- opts . set ( key, value) ?;
371+ ConfigField :: set ( & mut opts , key, value) ?;
296372 } else {
297373 tracing:: trace!( "Ignoring option '{key}'" ) ;
298374 }
@@ -698,7 +774,7 @@ mod tests {
698774 #[ test]
699775 fn format_plumbs_footer_initial_read_size ( ) {
700776 let mut opts = VortexTableOptions :: default ( ) ;
701- opts . set ( "footer_initial_read_size_bytes" , "12345" ) . unwrap ( ) ;
777+ ConfigField :: set ( & mut opts , "footer_initial_read_size_bytes" , "12345" ) . unwrap ( ) ;
702778
703779 let format = VortexFormat :: new_with_options ( VortexSession :: default ( ) , opts) ;
704780 assert_eq ! ( format. options( ) . footer_initial_read_size_bytes, 12345 ) ;
@@ -720,7 +796,12 @@ mod tests {
720796 . downcast_ref :: < VortexSource > ( )
721797 . ok_or_else ( || anyhow:: anyhow!( "expected VortexSource" ) ) ?;
722798
723- assert_eq ! ( source. options( ) , & opts) ;
799+ assert_eq ! (
800+ source. options( ) . projection_pushdown,
801+ opts. projection_pushdown
802+ ) ;
803+ assert_eq ! ( source. options( ) . predicate_pushdown, opts. predicate_pushdown) ;
804+ assert_eq ! ( source. options( ) . scan_concurrency, opts. scan_concurrency) ;
724805 Ok ( ( ) )
725806 }
726807}
0 commit comments