@@ -70,7 +70,53 @@ use crate::convert::TryToDataFusion;
7070
7171const DEFAULT_FOOTER_INITIAL_READ_SIZE_BYTES : usize = MAX_POSTSCRIPT_SIZE as usize + EOF_SIZE ;
7272
73- /// Vortex implementation of a DataFusion [`FileFormat`].
73+ /// DataFusion [`FileFormat`] implementation for `.vortex` files.
74+ ///
75+ /// Most applications do not construct `VortexFormat` directly. Instead, they
76+ /// register [`VortexFormatFactory`] with a [`SessionContext`] and let
77+ /// DataFusion instantiate `VortexFormat` as tables are planned.
78+ ///
79+ /// Construct `VortexFormat` directly when you are wiring a [`ListingTable`] by
80+ /// hand and need to pass a file format into [`ListingOptions`].
81+ ///
82+ /// # Example
83+ ///
84+ /// ```no_run
85+ /// use std::sync::Arc;
86+ ///
87+ /// use datafusion::datasource::listing::ListingOptions;
88+ /// use datafusion::datasource::listing::ListingTable;
89+ /// use datafusion::datasource::listing::ListingTableConfig;
90+ /// use datafusion::datasource::listing::ListingTableUrl;
91+ /// use datafusion::prelude::SessionContext;
92+ /// use tempfile::tempdir;
93+ /// use vortex::VortexSessionDefault;
94+ /// use vortex::session::VortexSession;
95+ /// use vortex_datafusion::VortexFormat;
96+ ///
97+ /// # #[tokio::main]
98+ /// # async fn main() -> Result<(), Box<dyn std::error::Error>> {
99+ /// let ctx = SessionContext::new();
100+ /// let dir = tempdir()?;
101+ ///
102+ /// let format = Arc::new(VortexFormat::new(VortexSession::default()));
103+ /// let table_url = ListingTableUrl::parse(dir.path().to_str().unwrap())?;
104+ /// let config = ListingTableConfig::new(table_url)
105+ /// .with_listing_options(
106+ /// ListingOptions::new(format).with_session_config_options(ctx.state().config()),
107+ /// )
108+ /// .infer_schema(&ctx.state())
109+ /// .await?;
110+ ///
111+ /// let table = ListingTable::try_new(config)?;
112+ /// # let _ = table;
113+ /// # Ok(())
114+ /// # }
115+ /// ```
116+ ///
117+ /// [`SessionContext`]: https://docs.rs/datafusion/latest/datafusion/prelude/struct.SessionContext.html
118+ /// [`ListingTable`]: https://docs.rs/datafusion/latest/datafusion/datasource/listing/struct.ListingTable.html
119+ /// [`ListingOptions`]: https://docs.rs/datafusion/latest/datafusion/datasource/listing/struct.ListingOptions.html
74120pub struct VortexFormat {
75121 session : VortexSession ,
76122 opts : VortexTableOptions ,
@@ -85,9 +131,24 @@ impl Debug for VortexFormat {
85131}
86132
87133config_namespace ! {
88- /// Options to configure the [`VortexFormat`].
134+ /// Options to configure [`VortexFormat`] and [`VortexSource `].
89135 ///
90- /// Can be set through a DataFusion [`SessionConfig`].
136+ /// These options are usually set on a [`VortexFormatFactory`] and inherited
137+ /// by the `VortexFormat` / `VortexSource` instances created for individual
138+ /// tables.
139+ ///
140+ /// # Example
141+ ///
142+ /// ```rust
143+ /// use vortex_datafusion::{VortexFormatFactory, VortexTableOptions};
144+ ///
145+ /// let factory = VortexFormatFactory::new().with_options(VortexTableOptions {
146+ /// projection_pushdown: true,
147+ /// scan_concurrency: Some(8),
148+ /// ..Default::default()
149+ /// });
150+ /// # let _ = factory;
151+ /// ```
91152 ///
92153 /// [`SessionConfig`]: https://docs.rs/datafusion/latest/datafusion/prelude/struct.SessionConfig.html
93154 pub struct VortexTableOptions {
@@ -113,7 +174,44 @@ config_namespace! {
113174
114175impl Eq for VortexTableOptions { }
115176
116- /// Minimal factory to create [`VortexFormat`] instances.
177+ /// Registration entry point for the file-backed Vortex integration.
178+ ///
179+ /// `VortexFormatFactory` is the type most applications use. Register it with a
180+ /// DataFusion session, and DataFusion will create [`VortexFormat`] values for
181+ /// `CREATE EXTERNAL TABLE`, [`ListingTable`], and URL-table scans.
182+ ///
183+ /// The factory stores a [`VortexSession`] and default [`VortexTableOptions`].
184+ /// Those defaults are copied into the formats and sources created for each
185+ /// table.
186+ ///
187+ /// # Example
188+ ///
189+ /// ```no_run
190+ /// use std::sync::Arc;
191+ ///
192+ /// use datafusion::datasource::provider::DefaultTableFactory;
193+ /// use datafusion::execution::SessionStateBuilder;
194+ /// use datafusion_common::GetExt;
195+ /// use vortex_datafusion::{VortexFormatFactory, VortexTableOptions};
196+ ///
197+ /// let factory = Arc::new(VortexFormatFactory::new().with_options(VortexTableOptions {
198+ /// projection_pushdown: true,
199+ /// ..Default::default()
200+ /// }));
201+ ///
202+ /// let mut state_builder = SessionStateBuilder::new()
203+ /// .with_default_features()
204+ /// .with_table_factory(
205+ /// factory.get_ext().to_uppercase(),
206+ /// Arc::new(DefaultTableFactory::new()),
207+ /// );
208+ ///
209+ /// if let Some(file_formats) = state_builder.file_formats() {
210+ /// file_formats.push(factory.clone() as _);
211+ /// }
212+ /// ```
213+ ///
214+ /// [`ListingTable`]: https://docs.rs/datafusion/latest/datafusion/datasource/listing/struct.ListingTable.html
117215#[ derive( Debug ) ]
118216pub struct VortexFormatFactory {
119217 session : VortexSession ,
@@ -127,7 +225,7 @@ impl GetExt for VortexFormatFactory {
127225}
128226
129227impl VortexFormatFactory {
130- /// Creates a new instance with a default [`VortexSession`] and default options.
228+ /// Creates a factory with a default [`VortexSession`] and default options.
131229 #[ expect(
132230 clippy:: new_without_default,
133231 reason = "FormatFactory defines `default` method, so having `Default` implementation is confusing"
@@ -139,23 +237,33 @@ impl VortexFormatFactory {
139237 }
140238 }
141239
142- /// Creates a new instance with customized session and default options for all [`VortexFormat`] instances created from this factory .
240+ /// Creates a factory with an explicit session and default options.
143241 ///
144- /// The options can be overridden by table-level configuration pass in [`FileFormatFactory::create`].
242+ /// The supplied options become the baseline for every [`VortexFormat`]
243+ /// created by this factory. DataFusion may still override them with
244+ /// table-level options passed into [`FileFormatFactory::create`].
145245 pub fn new_with_options ( session : VortexSession , options : VortexTableOptions ) -> Self {
146246 Self {
147247 session,
148248 options : Some ( options) ,
149249 }
150250 }
151251
152- /// Override the default options for this factory.
252+ /// Overrides the default options for this factory.
253+ ///
254+ /// This is the usual way to turn on features such as projection pushdown for
255+ /// every table created through the factory.
256+ ///
257+ /// # Example
153258 ///
154- /// For example:
155259 /// ```rust
156260 /// use vortex_datafusion::{VortexFormatFactory, VortexTableOptions};
157261 ///
158- /// let factory = VortexFormatFactory::new().with_options(VortexTableOptions::default());
262+ /// let factory = VortexFormatFactory::new().with_options(VortexTableOptions {
263+ /// projection_pushdown: true,
264+ /// ..Default::default()
265+ /// });
266+ /// # let _ = factory;
159267 /// ```
160268 pub fn with_options ( mut self , options : VortexTableOptions ) -> Self {
161269 self . options = Some ( options) ;
@@ -195,17 +303,23 @@ impl FileFormatFactory for VortexFormatFactory {
195303}
196304
197305impl VortexFormat {
198- /// Create a new instance with default options.
306+ /// Creates a format with default [`VortexTableOptions`].
307+ ///
308+ /// Prefer [`VortexFormatFactory`] when registering with a session. Construct
309+ /// `VortexFormat` directly when building [`ListingOptions`] manually.
310+ ///
311+ /// [`ListingOptions`]: https://docs.rs/datafusion/latest/datafusion/datasource/listing/struct.ListingOptions.html
199312 pub fn new ( session : VortexSession ) -> Self {
200313 Self :: new_with_options ( session, VortexTableOptions :: default ( ) )
201314 }
202315
203- /// Creates a new instance with configured by a [`VortexTableOptions`].
316+ /// Creates a format with explicit [`VortexTableOptions`].
204317 pub fn new_with_options ( session : VortexSession , opts : VortexTableOptions ) -> Self {
205318 Self { session, opts }
206319 }
207320
208- /// Return the format specific configuration
321+ /// Returns the format-specific configuration that will be copied into the
322+ /// [`VortexSource`] created for a scan.
209323 pub fn options ( & self ) -> & VortexTableOptions {
210324 & self . opts
211325 }
0 commit comments