@@ -328,7 +328,7 @@ Status ParseLiteral(ArrowArrayView* view_of_partition, int64_t row_idx,
328328}
329329
330330Status ParseDataFile (const std::shared_ptr<StructType>& data_file_schema,
331- ArrowArrayView* view_of_column,
331+ ArrowArrayView* view_of_column, std::optional< int64_t >& first_row_id,
332332 std::vector<ManifestEntry>& manifest_entries) {
333333 if (view_of_column->storage_type != ArrowType::NANOARROW_TYPE_STRUCT) {
334334 return InvalidManifest (" DataFile field should be a struct." );
@@ -432,10 +432,25 @@ Status ParseDataFile(const std::shared_ptr<StructType>& data_file_schema,
432432 PARSE_PRIMITIVE_FIELD (manifest_entries[row_idx].data_file ->sort_order_id ,
433433 view_of_file_field, int32_t );
434434 break ;
435- case 16 :
435+ case 16 : {
436436 PARSE_PRIMITIVE_FIELD (manifest_entries[row_idx].data_file ->first_row_id ,
437437 view_of_file_field, int64_t );
438+ if (first_row_id.has_value ()) {
439+ std::ranges::for_each (manifest_entries, [&first_row_id](ManifestEntry& entry) {
440+ if (entry.status != ManifestStatus::kDeleted &&
441+ !entry.data_file ->first_row_id .has_value ()) {
442+ entry.data_file ->first_row_id = first_row_id.value ();
443+ first_row_id = first_row_id.value () + entry.data_file ->record_count ;
444+ }
445+ });
446+ } else {
447+ // data file's first_row_id is null when the manifest's first_row_id is null
448+ std::ranges::for_each (manifest_entries, [](ManifestEntry& entry) {
449+ entry.data_file ->first_row_id = std::nullopt ;
450+ });
451+ }
438452 break ;
453+ }
439454 case 17 :
440455 PARSE_STRING_FIELD (manifest_entries[row_idx].data_file ->referenced_data_file ,
441456 view_of_file_field);
@@ -455,9 +470,9 @@ Status ParseDataFile(const std::shared_ptr<StructType>& data_file_schema,
455470 return {};
456471}
457472
458- Result<std::vector<ManifestEntry>> ParseManifestEntry (ArrowSchema* schema,
459- ArrowArray* array_in,
460- const Schema& iceberg_schema ) {
473+ Result<std::vector<ManifestEntry>> ParseManifestEntry (
474+ ArrowSchema* schema, ArrowArray* array_in, const Schema& iceberg_schema ,
475+ std::optional< int64_t >& first_row_id ) {
461476 if (schema->n_children != array_in->n_children ) {
462477 return InvalidManifest (" Columns size not match between schema:{} and array:{}" ,
463478 schema->n_children , array_in->n_children );
@@ -512,8 +527,8 @@ Result<std::vector<ManifestEntry>> ParseManifestEntry(ArrowSchema* schema,
512527 case 4 : {
513528 auto data_file_schema =
514529 internal::checked_pointer_cast<StructType>(field.value ()->get ().type ());
515- ICEBERG_RETURN_UNEXPECTED (
516- ParseDataFile (data_file_schema, view_of_column , manifest_entries));
530+ ICEBERG_RETURN_UNEXPECTED (ParseDataFile (data_file_schema, view_of_column,
531+ first_row_id , manifest_entries));
517532 break ;
518533 }
519534 default :
@@ -533,7 +548,7 @@ Result<std::vector<ManifestEntry>> ManifestReaderImpl::Entries() const {
533548 internal::ArrowArrayGuard array_guard (&result.value ());
534549 ICEBERG_ASSIGN_OR_RAISE (
535550 auto parse_result,
536- ParseManifestEntry (&arrow_schema, &result.value (), *schema_));
551+ ParseManifestEntry (&arrow_schema, &result.value (), *schema_, first_row_id_ ));
537552 manifest_entries.insert (manifest_entries.end (),
538553 std::make_move_iterator (parse_result.begin ()),
539554 std::make_move_iterator (parse_result.end ()));
0 commit comments