@@ -142,8 +142,10 @@ class TableScanTest : public testing::TestWithParam<int> {
142142
143143 std::shared_ptr<DataFile> MakeDataFile (const std::string& path,
144144 const PartitionValues& partition,
145- int32_t spec_id, int64_t record_count = 1 ) {
146- return std::make_shared<DataFile>(DataFile{
145+ int32_t spec_id, int64_t record_count = 1 ,
146+ std::optional<int32_t > lower_id = std::nullopt ,
147+ std::optional<int32_t > upper_id = std::nullopt ) {
148+ auto file = std::make_shared<DataFile>(DataFile{
147149 .file_path = path,
148150 .file_format = FileFormatType::kParquet ,
149151 .partition = partition,
@@ -152,6 +154,14 @@ class TableScanTest : public testing::TestWithParam<int> {
152154 .sort_order_id = 0 ,
153155 .partition_spec_id = spec_id,
154156 });
157+ // Set lower/upper bounds for field_id=1 ("id" column) if provided
158+ if (lower_id.has_value ()) {
159+ file->lower_bounds [1 ] = Literal::Int (lower_id.value ()).Serialize ().value ();
160+ }
161+ if (upper_id.has_value ()) {
162+ file->upper_bounds [1 ] = Literal::Int (upper_id.value ()).Serialize ().value ();
163+ }
164+ return file;
155165 }
156166
157167 ManifestEntry MakeEntry (ManifestStatus status, int64_t snapshot_id,
@@ -486,37 +496,35 @@ TEST_P(TableScanTest, PlanFilesWithMultipleManifests) {
486496TEST_P (TableScanTest, PlanFilesWithFilter) {
487497 int version = GetParam ();
488498
489- // Test that filters are properly passed to the ManifestGroup
490- auto filter = Expressions::Equal (" id" , Literal::Int (42 ));
499+ constexpr int64_t kSnapshotId = 1000L ;
491500 const auto part_value = PartitionValues ({Literal::Int (0 )});
492501
493- // Create data manifest with files
502+ // Create two data files with non-overlapping id ranges:
503+ // - data1.parquet: id range [1, 50]
504+ // - data2.parquet: id range [51, 100]
494505 std::vector<ManifestEntry> data_entries{
495- MakeEntry (ManifestStatus::kAdded , /* snapshot_id= */ 1000L , /* sequence_number=*/ 1 ,
506+ MakeEntry (ManifestStatus::kAdded , kSnapshotId , /* sequence_number=*/ 1 ,
496507 MakeDataFile (" /path/to/data1.parquet" , part_value,
497- partitioned_spec_->spec_id ())),
498- MakeEntry (ManifestStatus::kAdded , /* snapshot_id=*/ 1000L , /* sequence_number=*/ 1 ,
508+ partitioned_spec_->spec_id (), /* record_count=*/ 1 ,
509+ /* lower_id=*/ 1 , /* upper_id=*/ 50 )),
510+ MakeEntry (ManifestStatus::kAdded , kSnapshotId , /* sequence_number=*/ 1 ,
499511 MakeDataFile (" /path/to/data2.parquet" , part_value,
500- partitioned_spec_->spec_id ()))};
501- auto data_manifest = WriteDataManifest (version, /* snapshot_id=*/ 1000L ,
502- std::move (data_entries), partitioned_spec_);
503-
504- // Write manifest list
505- std::string manifest_list_path = WriteManifestList (
506- version, /* snapshot_id=*/ 1000L , /* sequence_number=*/ 1 , {data_manifest});
512+ partitioned_spec_->spec_id (), /* record_count=*/ 1 ,
513+ /* lower_id=*/ 51 , /* upper_id=*/ 100 ))};
514+ auto data_manifest =
515+ WriteDataManifest (version, kSnapshotId , std::move (data_entries), partitioned_spec_);
516+ std::string manifest_list_path =
517+ WriteManifestList (version, kSnapshotId , /* sequence_number=*/ 1 , {data_manifest});
507518
508- // Create a snapshot that references this manifest list
509519 auto timestamp_ms = TimePointMsFromUnixMs (1609459200000L );
510- auto snapshot_with_manifest =
511- std::make_shared<Snapshot>(Snapshot{.snapshot_id = 1000L ,
512- .parent_snapshot_id = std::nullopt ,
513- .sequence_number = 1L ,
514- .timestamp_ms = timestamp_ms,
515- .manifest_list = manifest_list_path,
516- .summary = {},
517- .schema_id = schema_->schema_id ()});
518-
519- auto metadata_with_manifest = std::make_shared<TableMetadata>(TableMetadata{
520+ auto snapshot = std::make_shared<Snapshot>(Snapshot{.snapshot_id = kSnapshotId ,
521+ .parent_snapshot_id = std::nullopt ,
522+ .sequence_number = 1L ,
523+ .timestamp_ms = timestamp_ms,
524+ .manifest_list = manifest_list_path,
525+ .schema_id = schema_->schema_id ()});
526+
527+ auto metadata = std::make_shared<TableMetadata>(TableMetadata{
520528 .format_version = 2 ,
521529 .table_uuid = " test-table-uuid" ,
522530 .location = " /tmp/table" ,
@@ -528,25 +536,54 @@ TEST_P(TableScanTest, PlanFilesWithFilter) {
528536 .partition_specs = {partitioned_spec_, unpartitioned_spec_},
529537 .default_spec_id = partitioned_spec_->spec_id (),
530538 .last_partition_id = 1000 ,
531- .current_snapshot_id = 1000L ,
532- .snapshots = {snapshot_with_manifest },
539+ .current_snapshot_id = kSnapshotId ,
540+ .snapshots = {snapshot },
533541 .snapshot_log = {SnapshotLogEntry{.timestamp_ms = timestamp_ms,
534- .snapshot_id = 1000L }},
542+ .snapshot_id = kSnapshotId }},
535543 .default_sort_order_id = 0 ,
536- .refs = {
537- {" main" , std::make_shared<SnapshotRef>(SnapshotRef{
538- .snapshot_id = 1000L , .retention = SnapshotRef::Branch{}})}}});
544+ .refs = {{" main" ,
545+ std::make_shared<SnapshotRef>(SnapshotRef{
546+ .snapshot_id = kSnapshotId , .retention = SnapshotRef::Branch{}})}}});
547+
548+ // Test 1: Filter matches only data1.parquet (id=25 is in range [1, 50])
549+ {
550+ ICEBERG_UNWRAP_OR_FAIL (auto builder, TableScanBuilder::Make (metadata, file_io_));
551+ builder->Filter (Expressions::Equal (" id" , Literal::Int (25 )));
552+ ICEBERG_UNWRAP_OR_FAIL (auto scan, builder->Build ());
553+ ICEBERG_UNWRAP_OR_FAIL (auto tasks, scan->PlanFiles ());
554+ ASSERT_EQ (tasks.size (), 1 );
555+ EXPECT_EQ (tasks[0 ]->data_file ()->file_path , " /path/to/data1.parquet" );
556+ }
539557
540- ICEBERG_UNWRAP_OR_FAIL (auto builder,
541- TableScanBuilder::Make (metadata_with_manifest, file_io_));
542- builder->Filter (filter);
543- ICEBERG_UNWRAP_OR_FAIL (auto scan, builder->Build ());
544- EXPECT_EQ (scan->filter (), filter);
558+ // Test 2: Filter matches only data2.parquet (id=75 is in range [51, 100])
559+ {
560+ ICEBERG_UNWRAP_OR_FAIL (auto builder, TableScanBuilder::Make (metadata, file_io_));
561+ builder->Filter (Expressions::Equal (" id" , Literal::Int (75 )));
562+ ICEBERG_UNWRAP_OR_FAIL (auto scan, builder->Build ());
563+ ICEBERG_UNWRAP_OR_FAIL (auto tasks, scan->PlanFiles ());
564+ ASSERT_EQ (tasks.size (), 1 );
565+ EXPECT_EQ (tasks[0 ]->data_file ()->file_path , " /path/to/data2.parquet" );
566+ }
545567
546- ICEBERG_UNWRAP_OR_FAIL (auto tasks, scan->PlanFiles ());
547- ASSERT_EQ (tasks.size (), 2 );
548- EXPECT_THAT (GetPaths (tasks), testing::UnorderedElementsAre (" /path/to/data1.parquet" ,
549- " /path/to/data2.parquet" ));
568+ // Test 3: Filter matches both files (id > 0 covers both ranges)
569+ {
570+ ICEBERG_UNWRAP_OR_FAIL (auto builder, TableScanBuilder::Make (metadata, file_io_));
571+ builder->Filter (Expressions::GreaterThan (" id" , Literal::Int (0 )));
572+ ICEBERG_UNWRAP_OR_FAIL (auto scan, builder->Build ());
573+ ICEBERG_UNWRAP_OR_FAIL (auto tasks, scan->PlanFiles ());
574+ ASSERT_EQ (tasks.size (), 2 );
575+ EXPECT_THAT (GetPaths (tasks), testing::UnorderedElementsAre (" /path/to/data1.parquet" ,
576+ " /path/to/data2.parquet" ));
577+ }
578+
579+ // Test 4: Filter matches no files (id=200 is outside both ranges)
580+ {
581+ ICEBERG_UNWRAP_OR_FAIL (auto builder, TableScanBuilder::Make (metadata, file_io_));
582+ builder->Filter (Expressions::Equal (" id" , Literal::Int (200 )));
583+ ICEBERG_UNWRAP_OR_FAIL (auto scan, builder->Build ());
584+ ICEBERG_UNWRAP_OR_FAIL (auto tasks, scan->PlanFiles ());
585+ EXPECT_TRUE (tasks.empty ());
586+ }
550587}
551588
552589TEST_P (TableScanTest, PlanFilesWithDeleteFiles) {
0 commit comments