@@ -594,8 +594,6 @@ struct ProjectionWithVirtualColumns {
594594 file_row_number_column_pos : Option < usize > ,
595595}
596596
597- /// Creates a projection expression from raw projection/column ID slices and
598- /// column names.
599597fn extract_projection_expr (
600598 projection_ids : Option < & [ u64 ] > ,
601599 column_ids : & [ u64 ] ,
@@ -610,33 +608,62 @@ fn extract_projection_expr(
610608
611609 let mut file_index_column_pos = None ;
612610 let mut file_row_number_column_pos = None ;
611+ let mut is_star = true ;
612+ let mut real_column_count = 0 ;
613+
614+ // DuckDB uses u64 as column indices but Rust uses usize
615+ for ( column_pos, & column_id) in ids. iter ( ) . enumerate ( ) {
616+ let column_id = if has_projection_ids {
617+ let column_id: usize = column_id. as_ ( ) ;
618+ column_ids[ column_id]
619+ } else {
620+ column_id
621+ } ;
613622
614- #[ expect( clippy:: cast_possible_truncation) ]
615- let names = ids
616- . iter ( )
617- . enumerate ( )
618- . map ( |( column_pos, & column_id) | {
619- let column_id = if has_projection_ids {
620- column_ids[ column_id as usize ]
621- } else {
622- column_id
623- } ;
623+ if column_id == FILE_INDEX_COLUMN_IDX {
624+ file_index_column_pos = Some ( column_pos) ;
625+ continue ;
626+ }
627+ if column_id == FILE_ROW_NUMBER_COLUMN_IDX {
628+ file_row_number_column_pos = Some ( column_pos) ;
629+ continue ;
630+ }
624631
625- if column_id == FILE_INDEX_COLUMN_IDX {
626- file_index_column_pos = Some ( column_pos) ;
627- }
628- if column_id == FILE_ROW_NUMBER_COLUMN_IDX {
629- file_row_number_column_pos = Some ( column_pos) ;
630- }
632+ // In SELECT * DuckDB requests all columns from 0 to column_fields in
633+ // increasing order. After removing virtual columns, compare column_id
634+ // with (0..column_fields.len()) range.
635+ is_star &= column_id == real_column_count;
636+ real_column_count += 1 ;
637+ }
638+ // Duckdb can request less columns than there are in table i.e. [0, 1] with
639+ // 5 columns total.
640+ is_star &= real_column_count == column_fields. len ( ) as u64 ;
631641
632- column_id
633- } )
634- . filter ( |& col_id| !is_virtual_column ( col_id) )
635- . map ( |col_id| Arc :: from ( column_fields[ col_id as usize ] . name . as_str ( ) ) )
636- . collect :: < FieldNames > ( ) ;
642+ let select = if is_star {
643+ root ( )
644+ } else {
645+ let names = ids
646+ . iter ( )
647+ . map ( |& column_id| {
648+ if has_projection_ids {
649+ let column_id: usize = column_id. as_ ( ) ;
650+ column_ids[ column_id]
651+ } else {
652+ column_id
653+ }
654+ } )
655+ . filter ( |& col_id| !is_virtual_column ( col_id) )
656+ . map ( |column_id| {
657+ let column_id: usize = column_id. as_ ( ) ;
658+ Arc :: from ( column_fields[ column_id] . name . as_str ( ) )
659+ } )
660+ . collect :: < FieldNames > ( ) ;
661+
662+ select ( names, root ( ) )
663+ } ;
637664
638665 // file_index column will be filled later when exporting the chunk.
639- let select = select ( names , root ( ) ) ;
666+
640667 let projection = if file_row_number_column_pos. is_some ( ) {
641668 // row_idx will be rearranged to correct position in scan(), prepend
642669 // here
@@ -723,7 +750,20 @@ mod tests {
723750 use std:: sync:: atomic:: AtomicU64 ;
724751 use std:: sync:: atomic:: Ordering :: Relaxed ;
725752
753+ use vortex:: dtype:: DType ;
754+ use vortex:: dtype:: PType ;
755+ use vortex:: expr:: cast;
756+ use vortex:: expr:: merge;
757+ use vortex:: expr:: pack;
758+ use vortex:: expr:: root;
759+ use vortex:: layout:: layouts:: row_idx:: row_idx;
760+
726761 use super :: progress;
762+ use crate :: datasource:: DuckdbField ;
763+ use crate :: datasource:: FILE_INDEX_COLUMN_IDX ;
764+ use crate :: datasource:: FILE_ROW_NUMBER_COLUMN_IDX ;
765+ use crate :: datasource:: extract_projection_expr;
766+ use crate :: duckdb:: LogicalType ;
727767
728768 #[ test]
729769 fn test_table_scan_progress ( ) {
@@ -738,4 +778,65 @@ mod tests {
738778 bytes_total. fetch_add ( 100 , Relaxed ) ;
739779 assert ! ( ( progress( & bytes_read, & bytes_total) - 50. ) . abs( ) < f64 :: EPSILON ) ;
740780 }
781+
782+ #[ test]
783+ fn test_select_star ( ) {
784+ let ids = [ 0 , 1 , 2 ] ;
785+ let fields = [
786+ DuckdbField {
787+ name : "" . to_owned ( ) ,
788+ logical_type : LogicalType :: null ( ) ,
789+ dtype : DType :: Null ,
790+ } ,
791+ DuckdbField {
792+ name : "" . to_owned ( ) ,
793+ logical_type : LogicalType :: null ( ) ,
794+ dtype : DType :: Null ,
795+ } ,
796+ DuckdbField {
797+ name : "" . to_owned ( ) ,
798+ logical_type : LogicalType :: null ( ) ,
799+ dtype : DType :: Null ,
800+ } ,
801+ ] ;
802+
803+ assert_eq ! (
804+ extract_projection_expr( None , & ids, & fields) . projection,
805+ root( )
806+ ) ;
807+
808+ let ids = [ FILE_ROW_NUMBER_COLUMN_IDX , 0 , 1 , FILE_INDEX_COLUMN_IDX , 2 ] ;
809+ let exprs = extract_projection_expr ( None , & ids, & fields) ;
810+ let row_idx = cast ( row_idx ( ) , DType :: Primitive ( PType :: I64 , false . into ( ) ) ) ;
811+ let row_idx_struct = pack ( [ ( "file_row_number" , row_idx) ] , false . into ( ) ) ;
812+ let root_with_virtual_cols = merge ( [ row_idx_struct, root ( ) ] ) ;
813+
814+ assert_eq ! ( exprs. projection, root_with_virtual_cols) ;
815+ assert_eq ! ( exprs. file_index_column_pos, Some ( 3 ) ) ;
816+ assert_eq ! ( exprs. file_row_number_column_pos, Some ( 0 ) ) ;
817+
818+ // projections can't be set in SELECT *.
819+ assert_ne ! (
820+ extract_projection_expr( Some ( & [ 0 , 1 ] ) , & ids, & fields) . projection,
821+ root( )
822+ ) ;
823+
824+ let ids = [ 0 , 1 ] ;
825+ assert_ne ! (
826+ extract_projection_expr( None , & ids, & fields) . projection,
827+ root( )
828+ ) ;
829+
830+ let ids = [ 0 , 2 , 2 ] ;
831+ assert_ne ! (
832+ extract_projection_expr( None , & ids, & fields) . projection,
833+ root( )
834+ ) ;
835+
836+ let ids = [ 2 , 1 , 0 ] ;
837+ assert_ne ! (
838+ extract_projection_expr( None , & ids, & fields) . projection,
839+ root( )
840+ ) ;
841+ }
741842}
0 commit comments