@@ -666,6 +666,109 @@ TEST_P(TableScanTest, PlanFilesWithDeleteFiles) {
666666 }
667667}
668668
669+ TEST_P (TableScanTest, SchemaWithSelectedColumnsAndFilter) {
670+ auto schema = std::make_shared<Schema>(std::vector<SchemaField>{
671+ SchemaField::MakeRequired (/* field_id=*/ 1 , " id" , int32 ()),
672+ SchemaField::MakeRequired (/* field_id=*/ 2 , " data" , string ()),
673+ SchemaField::MakeRequired (/* field_id=*/ 3 , " value" , int64 ())});
674+ auto timestamp_ms = TimePointMsFromUnixMs (1609459200000L );
675+ auto metadata = std::make_shared<TableMetadata>(TableMetadata{
676+ .format_version = 2 ,
677+ .table_uuid = " test-table-uuid" ,
678+ .location = " /tmp/table" ,
679+ .last_sequence_number = 1L ,
680+ .last_updated_ms = timestamp_ms,
681+ .last_column_id = 3 ,
682+ .schemas = {schema},
683+ .current_schema_id = schema->schema_id (),
684+ .partition_specs = {unpartitioned_spec_},
685+ .default_spec_id = unpartitioned_spec_->spec_id (),
686+ .last_partition_id = 1000 ,
687+ .current_snapshot_id = 1000L ,
688+ .snapshots = {std::make_shared<Snapshot>(Snapshot{
689+ .snapshot_id = 1000L ,
690+ .parent_snapshot_id = std::nullopt ,
691+ .sequence_number = 1L ,
692+ .timestamp_ms = timestamp_ms,
693+ .manifest_list = " /tmp/metadata/snap-1000-1-manifest-list.avro" ,
694+ .schema_id = schema->schema_id (),
695+ })},
696+ .snapshot_log = {SnapshotLogEntry{.timestamp_ms = timestamp_ms,
697+ .snapshot_id = 1000L }},
698+ .default_sort_order_id = 0 ,
699+ .refs = {{" main" , std::make_shared<SnapshotRef>(SnapshotRef{
700+ .snapshot_id = 1000L ,
701+ .retention = SnapshotRef::Branch{},
702+ })}},
703+ });
704+
705+ // Select "data" column, filter on "id" column
706+ {
707+ ICEBERG_UNWRAP_OR_FAIL (auto builder, TableScanBuilder::Make (metadata, file_io_));
708+ builder->Select ({" data" }).Filter (Expressions::Equal (" id" , Literal::Int (42 )));
709+ ICEBERG_UNWRAP_OR_FAIL (auto scan, builder->Build ());
710+ ICEBERG_UNWRAP_OR_FAIL (auto projected_schema, scan->schema ());
711+
712+ ASSERT_EQ (projected_schema->fields ().size (), 2 );
713+
714+ ICEBERG_UNWRAP_OR_FAIL (auto id_field, projected_schema->FindFieldByName (" id" ));
715+ EXPECT_TRUE (id_field.has_value ());
716+ EXPECT_EQ (id_field->get ().field_id (), 1 );
717+
718+ ICEBERG_UNWRAP_OR_FAIL (auto data_field, projected_schema->FindFieldByName (" data" ));
719+ EXPECT_TRUE (data_field.has_value ());
720+ EXPECT_EQ (data_field->get ().field_id (), 2 );
721+ }
722+
723+ // Select "id" and "value", filter on "data"
724+ {
725+ ICEBERG_UNWRAP_OR_FAIL (auto builder, TableScanBuilder::Make (metadata, file_io_));
726+ builder->Select ({" id" , " value" })
727+ .Filter (Expressions::Equal (" data" , Literal::String (" test" )));
728+ ICEBERG_UNWRAP_OR_FAIL (auto scan, builder->Build ());
729+ ICEBERG_UNWRAP_OR_FAIL (auto projected_schema, scan->schema ());
730+
731+ ASSERT_EQ (projected_schema->fields ().size (), 3 );
732+
733+ ICEBERG_UNWRAP_OR_FAIL (auto id_field, projected_schema->FindFieldByName (" id" ));
734+ EXPECT_TRUE (id_field.has_value ());
735+
736+ ICEBERG_UNWRAP_OR_FAIL (auto data_field, projected_schema->FindFieldByName (" data" ));
737+ EXPECT_TRUE (data_field.has_value ());
738+
739+ ICEBERG_UNWRAP_OR_FAIL (auto value_field, projected_schema->FindFieldByName (" value" ));
740+ EXPECT_TRUE (value_field.has_value ());
741+ }
742+
743+ // Select "id", filter on "id" - should only have "id" once
744+ {
745+ ICEBERG_UNWRAP_OR_FAIL (auto builder, TableScanBuilder::Make (metadata, file_io_));
746+ builder->Select ({" id" }).Filter (Expressions::Equal (" id" , Literal::Int (42 )));
747+ ICEBERG_UNWRAP_OR_FAIL (auto scan, builder->Build ());
748+ ICEBERG_UNWRAP_OR_FAIL (auto projected_schema, scan->schema ());
749+
750+ ASSERT_EQ (projected_schema->fields ().size (), 1 );
751+
752+ ICEBERG_UNWRAP_OR_FAIL (auto id_field, projected_schema->FindFieldByName (" id" ));
753+ EXPECT_TRUE (id_field.has_value ());
754+ EXPECT_EQ (id_field->get ().field_id (), 1 );
755+ }
756+
757+ // Select columns without filter
758+ {
759+ ICEBERG_UNWRAP_OR_FAIL (auto builder, TableScanBuilder::Make (metadata, file_io_));
760+ builder->Select ({" data" });
761+ ICEBERG_UNWRAP_OR_FAIL (auto scan, builder->Build ());
762+ ICEBERG_UNWRAP_OR_FAIL (auto projected_schema, scan->schema ());
763+
764+ ASSERT_EQ (projected_schema->fields ().size (), 1 );
765+
766+ ICEBERG_UNWRAP_OR_FAIL (auto data_field, projected_schema->FindFieldByName (" data" ));
767+ EXPECT_TRUE (data_field.has_value ());
768+ EXPECT_EQ (data_field->get ().field_id (), 2 );
769+ }
770+ }
771+
669772INSTANTIATE_TEST_SUITE_P (TableScanVersions, TableScanTest, testing::Values(1 , 2 , 3 ));
670773
671774} // namespace iceberg
0 commit comments