@@ -23,14 +23,15 @@ use std::sync::Arc;
2323
2424use crate :: DefaultParquetFileReaderFactory ;
2525use crate :: ParquetFileReaderFactory ;
26+ use crate :: opener:: ParquetMorselizer ;
2627use crate :: opener:: build_pruning_predicates;
27- use crate :: opener:: { ParquetMorselizer , ParquetOpener } ;
2828use crate :: row_filter:: can_expr_be_pushed_down_with_schemas;
2929use datafusion_common:: config:: ConfigOptions ;
3030#[ cfg( feature = "parquet_encryption" ) ]
3131use datafusion_common:: config:: EncryptionFactoryOptions ;
3232use datafusion_datasource:: as_file_source;
3333use datafusion_datasource:: file_stream:: FileOpener ;
34+ use datafusion_datasource:: morsel:: Morselizer ;
3435
3536use arrow:: datatypes:: TimeUnit ;
3637use datafusion_common:: DataFusionError ;
@@ -246,12 +247,12 @@ use parquet::encryption::decrypt::FileDecryptionProperties;
246247/// # Execution Overview
247248///
248249/// * Step 1: `DataSourceExec::execute` is called, returning a `FileStream`
249- /// configured to open parquet files with a `ParquetOpener `.
250+ /// configured to morselize parquet files with a `ParquetMorselizer `.
250251///
251- /// * Step 2: When the stream is polled, the `ParquetOpener ` is called to open
252- /// the file.
252+ /// * Step 2: When the stream is polled, the `ParquetMorselizer ` is called to
253+ /// plan the file.
253254///
254- /// * Step 3: The `ParquetOpener ` gets the [`ParquetMetaData`] (file metadata)
255+ /// * Step 3: The `ParquetMorselizer ` gets the [`ParquetMetaData`] (file metadata)
255256/// via [`ParquetFileReaderFactory`], creating a `ParquetAccessPlan` by
256257/// applying predicates to metadata. The plan and projections are used to
257258/// determine what pages must be read.
@@ -511,11 +512,22 @@ impl From<ParquetSource> for Arc<dyn FileSource> {
511512
512513impl FileSource for ParquetSource {
513514 fn create_file_opener (
515+ & self ,
516+ _object_store : Arc < dyn ObjectStore > ,
517+ _base_config : & FileScanConfig ,
518+ _partition : usize ,
519+ ) -> datafusion_common:: Result < Arc < dyn FileOpener > > {
520+ datafusion_common:: internal_err!(
521+ "ParquetSource::create_file_opener called but it supports the Morsel API"
522+ )
523+ }
524+
525+ fn create_morselizer (
514526 & self ,
515527 object_store : Arc < dyn ObjectStore > ,
516528 base_config : & FileScanConfig ,
517529 partition : usize ,
518- ) -> datafusion_common:: Result < Arc < dyn FileOpener > > {
530+ ) -> datafusion_common:: Result < Box < dyn Morselizer > > {
519531 let expr_adapter_factory = base_config
520532 . expr_adapter_factory
521533 . clone ( )
@@ -542,37 +554,34 @@ impl FileSource for ParquetSource {
542554 . as_ref ( )
543555 . map ( |time_unit| parse_coerce_int96_string ( time_unit. as_str ( ) ) . unwrap ( ) ) ;
544556
545- let opener = Arc :: new ( ParquetOpener {
546- morselizer : ParquetMorselizer {
547- partition_index : partition,
548- projection : self . projection . clone ( ) ,
549- batch_size : self
550- . batch_size
551- . expect ( "Batch size must set before creating ParquetOpener" ) ,
552- limit : base_config. limit ,
553- preserve_order : base_config. preserve_order ,
554- predicate : self . predicate . clone ( ) ,
555- table_schema : self . table_schema . clone ( ) ,
556- metadata_size_hint : self . metadata_size_hint ,
557- metrics : self . metrics ( ) . clone ( ) ,
558- parquet_file_reader_factory,
559- pushdown_filters : self . pushdown_filters ( ) ,
560- reorder_filters : self . reorder_filters ( ) ,
561- force_filter_selections : self . force_filter_selections ( ) ,
562- enable_page_index : self . enable_page_index ( ) ,
563- enable_bloom_filter : self . bloom_filter_on_read ( ) ,
564- enable_row_group_stats_pruning : self . table_parquet_options . global . pruning ,
565- coerce_int96,
566- #[ cfg( feature = "parquet_encryption" ) ]
567- file_decryption_properties,
568- expr_adapter_factory,
569- #[ cfg( feature = "parquet_encryption" ) ]
570- encryption_factory : self . get_encryption_factory_with_config ( ) ,
571- max_predicate_cache_size : self . max_predicate_cache_size ( ) ,
572- reverse_row_groups : self . reverse_row_groups ,
573- } ,
574- } ) ;
575- Ok ( opener)
557+ Ok ( Box :: new ( ParquetMorselizer {
558+ partition_index : partition,
559+ projection : self . projection . clone ( ) ,
560+ batch_size : self
561+ . batch_size
562+ . expect ( "Batch size must set before creating ParquetMorselizer" ) ,
563+ limit : base_config. limit ,
564+ preserve_order : base_config. preserve_order ,
565+ predicate : self . predicate . clone ( ) ,
566+ table_schema : self . table_schema . clone ( ) ,
567+ metadata_size_hint : self . metadata_size_hint ,
568+ metrics : self . metrics ( ) . clone ( ) ,
569+ parquet_file_reader_factory,
570+ pushdown_filters : self . pushdown_filters ( ) ,
571+ reorder_filters : self . reorder_filters ( ) ,
572+ force_filter_selections : self . force_filter_selections ( ) ,
573+ enable_page_index : self . enable_page_index ( ) ,
574+ enable_bloom_filter : self . bloom_filter_on_read ( ) ,
575+ enable_row_group_stats_pruning : self . table_parquet_options . global . pruning ,
576+ coerce_int96,
577+ #[ cfg( feature = "parquet_encryption" ) ]
578+ file_decryption_properties,
579+ expr_adapter_factory,
580+ #[ cfg( feature = "parquet_encryption" ) ]
581+ encryption_factory : self . get_encryption_factory_with_config ( ) ,
582+ max_predicate_cache_size : self . max_predicate_cache_size ( ) ,
583+ reverse_row_groups : self . reverse_row_groups ,
584+ } ) )
576585 }
577586
578587 fn as_any ( & self ) -> & dyn Any {
0 commit comments