From a3e90d31ea496b15ed171a181e209cf5b334b09c Mon Sep 17 00:00:00 2001
From: Matt Butrovich <mbutrovich@gmail.com>
Date: Tue, 31 Mar 2026 12:01:04 -0400
Subject: [PATCH 1/8] coerce int96 timestamps

---
 crates/iceberg/src/arrow/reader.rs | 514 +++++++++++++++++++++++++++++
 1 file changed, 514 insertions(+)
diff --git a/crates/iceberg/src/arrow/reader.rs b/crates/iceberg/src/arrow/reader.rs
index 042a730e19..745de66d94 100644
--- a/crates/iceberg/src/arrow/reader.rs
+++ b/crates/iceberg/src/arrow/reader.rs
@@ -386,6 +386,28 @@ impl ArrowReader {
             arrow_metadata
         };
 
+        // Coerce INT96 timestamp columns to the correct TimeUnit from the Iceberg schema.
+        // Must happen before building the stream so arrow-rs decodes INT96 at the right
+        // resolution (e.g., microseconds instead of nanoseconds to avoid i64 overflow).
+        let arrow_metadata = if let Some(coerced_schema) = coerce_int96_timestamps(
+            arrow_metadata.metadata().file_metadata().schema_descr(),
+            arrow_metadata.schema(),
+            &task.schema,
+        ) {
+            let options = ArrowReaderOptions::new().with_schema(coerced_schema);
+            ArrowReaderMetadata::try_new(Arc::clone(arrow_metadata.metadata()), options).map_err(
+                |e| {
+                    Error::new(
+                        ErrorKind::Unexpected,
+                        "Failed to create ArrowReaderMetadata with INT96-coerced schema",
+                    )
+                    .with_source(e)
+                },
+            )?
+        } else {
+            arrow_metadata
+        };
+
         // Build the stream reader, reusing the already-opened file reader
         let mut record_batch_stream_builder =
             ParquetRecordBatchStreamBuilder::new_with_metadata(parquet_file_reader, arrow_metadata);
@@ -1232,6 +1254,121 @@ fn add_fallback_field_ids_to_arrow_schema(arrow_schema: &ArrowSchemaRef) -> Arc<
     ))
 }
 
+/// Coerce Arrow schema types for Parquet INT96 columns to match the Iceberg table schema.
+///
+/// INT96 is a legacy Parquet physical type for timestamps (common in Spark/Hive files).
+/// arrow-rs defaults INT96 to Timestamp(Nanosecond), which overflows i64 for dates outside
+/// ~1677-2262. The Iceberg spec defines timestamps as microsecond precision (timestamp) or
+/// nanosecond precision (timestamp_ns).
+/// See: https://iceberg.apache.org/spec/#primitive-types
+///
+/// This function patches the Arrow schema so arrow-rs reads INT96 at the correct resolution,
+/// using the schema hint mechanism from arrow-rs PR #7285.
+///
+/// Iceberg Java avoids this by using a custom INT96 column reader that bypasses parquet-mr's
+/// default decoding (GenericParquetReaders.TimestampInt96Reader). iceberg-rust delegates to
+/// arrow-rs, so we must pass the correct schema hint instead.
+fn coerce_int96_timestamps(
+    parquet_schema: &SchemaDescriptor,
+    arrow_schema: &ArrowSchemaRef,
+    iceberg_schema: &Schema,
+) -> Option<Arc<ArrowSchema>> {
+    use arrow_schema::{DataType, Field, Fields, TimeUnit};
+    use parquet::basic::Type as PhysicalType;
+
+    // Collect paths of INT96 leaf columns from the Parquet schema
+    let int96_paths: HashSet<String> = parquet_schema
+        .columns()
+        .iter()
+        .filter(|col| col.physical_type() == PhysicalType::INT96)
+        .map(|col| col.path().string())
+        .collect();
+
+    if int96_paths.is_empty() {
+        return None;
+    }
+
+    // Recursively coerce fields, tracking the Parquet column path for matching
+    fn coerce_field(
+        field: &FieldRef,
+        path_parts: &[&str],
+        int96_paths: &HashSet<String>,
+        iceberg_schema: &Schema,
+    ) -> FieldRef {
+        match field.data_type() {
+            DataType::Struct(fields) => {
+                let new_fields: Vec<FieldRef> = fields
+                    .iter()
+                    .map(|child| {
+                        let mut child_path = path_parts.to_vec();
+                        child_path.push(child.name().as_str());
+                        coerce_field(child, &child_path, int96_paths, iceberg_schema)
+                    })
+                    .collect();
+                Arc::new(
+                    Field::new(
+                        field.name(),
+                        DataType::Struct(Fields::from(new_fields)),
+                        field.is_nullable(),
+                    )
+                    .with_metadata(field.metadata().clone()),
+                )
+            }
+            DataType::Timestamp(TimeUnit::Nanosecond, tz) => {
+                let full_path = path_parts.join(".");
+                if int96_paths.contains(&full_path) {
+                    // Look up the Iceberg field by ID to determine target TimeUnit
+                    let target_unit = field
+                        .metadata()
+                        .get(PARQUET_FIELD_ID_META_KEY)
+                        .and_then(|id_str| id_str.parse::<i32>().ok())
+                        .and_then(|field_id| iceberg_schema.field_by_id(field_id))
+                        .and_then(|iceberg_field| match &*iceberg_field.field_type {
+                            Type::Primitive(PrimitiveType::Timestamp)
+                            | Type::Primitive(PrimitiveType::Timestamptz) => {
+                                Some(TimeUnit::Microsecond)
+                            }
+                            Type::Primitive(PrimitiveType::TimestampNs)
+                            | Type::Primitive(PrimitiveType::TimestamptzNs) => {
+                                Some(TimeUnit::Nanosecond)
+                            }
+                            _ => None,
+                        })
+                        // Default to Microsecond for INT96 — matches Iceberg Java behavior
+                        .unwrap_or(TimeUnit::Microsecond);
+
+                    if target_unit != TimeUnit::Nanosecond {
+                        return Arc::new(
+                            Field::new(
+                                field.name(),
+                                DataType::Timestamp(target_unit, tz.clone()),
+                                field.is_nullable(),
+                            )
+                            .with_metadata(field.metadata().clone()),
+                        );
+                    }
+                }
+                Arc::clone(field)
+            }
+            _ => Arc::clone(field),
+        }
+    }
+
+    let coerced_fields: Vec<FieldRef> = arrow_schema
+        .fields()
+        .iter()
+        .map(|field| coerce_field(field, &[field.name().as_str()], &int96_paths, iceberg_schema))
+        .collect();
+
+    let coerced = ArrowSchema::new_with_metadata(coerced_fields, arrow_schema.metadata().clone());
+
+    if &coerced != arrow_schema.as_ref() {
+        Some(Arc::new(coerced))
+    } else {
+        None
+    }
+}
+
 /// A visitor to collect field ids from bound predicates.
 struct CollectFieldIdVisitor {
     field_ids: HashSet<i32>,
@@ -4667,4 +4804,381 @@ message schema {
         assert_eq!(result[1], expected_1);
         assert_eq!(result[2], expected_2);
     }
+
+    /// Helper to write a Parquet file with INT96 timestamps using the low-level writer.
+    /// Returns (file_path, expected_microsecond_values).
+    ///
+    /// We must use SerializedFileWriter because ArrowWriter cannot write INT96.
+    fn write_int96_parquet_file(
+        table_location: &str,
+        filename: &str,
+        with_field_ids: bool,
+    ) -> (String, Vec<i64>) {
+        use parquet::basic::{Repetition, Type as PhysicalType};
+        use parquet::data_type::{Int32Type, Int96, Int96Type};
+        use parquet::file::writer::SerializedFileWriter;
+        use parquet::schema::types::Type as SchemaType;
+
+        let file_path = format!("{table_location}/{filename}");
+
+        // Build Parquet schema with INT96 timestamp and INT32 id
+        let mut ts_builder =
+            SchemaType::primitive_type_builder("ts", PhysicalType::INT96)
+                .with_repetition(Repetition::OPTIONAL);
+        let mut id_builder =
+            SchemaType::primitive_type_builder("id", PhysicalType::INT32)
+                .with_repetition(Repetition::REQUIRED);
+
+        if with_field_ids {
+            ts_builder = ts_builder.with_id(Some(1));
+            id_builder = id_builder.with_id(Some(2));
+        }
+
+        let schema = SchemaType::group_type_builder("schema")
+            .with_fields(vec![
+                Arc::new(ts_builder.build().unwrap()),
+                Arc::new(id_builder.build().unwrap()),
+            ])
+            .build()
+            .unwrap();
+
+        // INT96 encoding: [nanos_low_u32, nanos_high_u32, julian_day_u32]
+        // Julian day 2_440_588 = Unix epoch (1970-01-01)
+        //
+        // We pick dates outside the i64 nanosecond range (~1677-2262) to trigger
+        // the overflow bug. Year 3333 ≈ Julian day 2_953_529.
+        let test_data: Vec<(u32, u32, u32, i64)> = vec![
+            // (nanos_low, nanos_high, julian_day, expected_micros_since_epoch)
+            // Day 2_953_529 = 3333-01-01, nanos_within_day = 0
+            (0, 0, 2_953_529, {
+                (2_953_529i64 - 2_440_588) * 86_400_000_000
+            }),
+            // Day 2_953_529 = 3333-01-01, nanos_within_day = 43_200_000_000_000 (12:00:00 noon)
+            // 43_200_000_000_000 as two u32s: low = 43_200_000_000_000 & 0xFFFFFFFF, high = 43_200_000_000_000 >> 32
+            ({
+                (43_200_000_000_000u64 & 0xFFFFFFFF) as u32
+            }, {
+                (43_200_000_000_000u64 >> 32) as u32
+            }, 2_953_529, {
+                (2_953_529i64 - 2_440_588) * 86_400_000_000 + 43_200_000_000
+            }),
+            // Day 2_488_070 = 2100-01-01, nanos_within_day = 0
+            (0, 0, 2_488_070, {
+                (2_488_070i64 - 2_440_588) * 86_400_000_000
+            }),
+        ];
+
+        let int96_values: Vec<Int96> = test_data
+            .iter()
+            .map(|(lo, hi, day, _)| {
+                let mut v = Int96::new();
+                v.set_data(*lo, *hi, *day);
+                v
+            })
+            .collect();
+
+        let id_values: Vec<i32> = (0..test_data.len() as i32).collect();
+        let expected_micros: Vec<i64> = test_data.iter().map(|(_, _, _, m)| *m).collect();
+
+        let file = File::create(&file_path).unwrap();
+        let mut writer =
+            SerializedFileWriter::new(file, Arc::new(schema), Default::default()).unwrap();
+
+        let mut row_group = writer.next_row_group().unwrap();
+        {
+            // Write ts column (optional — all present, so def_levels = [1, 1, 1])
+            let mut col = row_group.next_column().unwrap().unwrap();
+            col.typed::<Int96Type>()
+                .write_batch(&int96_values, Some(&vec![1i16; test_data.len()]), None)
+                .unwrap();
+            col.close().unwrap();
+        }
+        {
+            // Write id column (required — no def levels)
+            let mut col = row_group.next_column().unwrap().unwrap();
+            col.typed::<Int32Type>()
+                .write_batch(&id_values, None, None)
+                .unwrap();
+            col.close().unwrap();
+        }
+        row_group.close().unwrap();
+        writer.close().unwrap();
+
+        (file_path, expected_micros)
+    }
+
+    /// Test reading INT96 timestamps through iceberg-rust's ArrowReader.
+    /// Files WITH embedded field IDs (branch 1 of schema resolution).
+    #[tokio::test]
+    async fn test_read_int96_timestamps_with_field_ids() {
+        use arrow_array::TimestampMicrosecondArray;
+
+        let schema = Arc::new(
+            Schema::builder()
+                .with_schema_id(1)
+                .with_fields(vec![
+                    NestedField::optional(1, "ts", Type::Primitive(PrimitiveType::Timestamp))
+                        .into(),
+                    NestedField::required(2, "id", Type::Primitive(PrimitiveType::Int)).into(),
+                ])
+                .build()
+                .unwrap(),
+        );
+
+        let tmp_dir = TempDir::new().unwrap();
+        let table_location = tmp_dir.path().to_str().unwrap().to_string();
+
+        let (file_path, expected_micros) =
+            write_int96_parquet_file(&table_location, "with_ids.parquet", true);
+
+        let file_io = FileIO::new_with_fs();
+        let reader = ArrowReaderBuilder::new(file_io).build();
+
+        let task = FileScanTask {
+            file_size_in_bytes: std::fs::metadata(&file_path).unwrap().len(),
+            start: 0,
+            length: std::fs::metadata(&file_path).unwrap().len(),
+            record_count: None,
+            data_file_path: file_path,
+            data_file_format: DataFileFormat::Parquet,
+            schema: schema.clone(),
+            project_field_ids: vec![1, 2],
+            predicate: None,
+            deletes: vec![],
+            partition: None,
+            partition_spec: None,
+            name_mapping: None,
+            case_sensitive: false,
+        };
+
+        let tasks = Box::pin(futures::stream::iter(vec![Ok(task)])) as FileScanTaskStream;
+        let batches: Vec<RecordBatch> = reader
+            .read(tasks)
+            .unwrap()
+            .try_collect()
+            .await
+            .unwrap();
+
+        assert_eq!(batches.len(), 1);
+        let batch = &batches[0];
+
+        let ts_array = batch
+            .column(0)
+            .as_any()
+            .downcast_ref::<TimestampMicrosecondArray>()
+            .expect("Expected TimestampMicrosecondArray");
+
+        for (i, expected) in expected_micros.iter().enumerate() {
+            assert_eq!(
+                ts_array.value(i),
+                *expected,
+                "Mismatch at row {i}: got {}, expected {expected}",
+                ts_array.value(i)
+            );
+        }
+    }
+
+    /// Test reading INT96 timestamps from migrated files WITHOUT field IDs
+    /// (branches 2/3 of schema resolution — the common migration case).
+    #[tokio::test]
+    async fn test_read_int96_timestamps_without_field_ids() {
+        use arrow_array::TimestampMicrosecondArray;
+
+        let schema = Arc::new(
+            Schema::builder()
+                .with_schema_id(1)
+                .with_fields(vec![
+                    NestedField::optional(1, "ts", Type::Primitive(PrimitiveType::Timestamp))
+                        .into(),
+                    NestedField::required(2, "id", Type::Primitive(PrimitiveType::Int)).into(),
+                ])
+                .build()
+                .unwrap(),
+        );
+
+        let tmp_dir = TempDir::new().unwrap();
+        let table_location = tmp_dir.path().to_str().unwrap().to_string();
+
+        let (file_path, expected_micros) =
+            write_int96_parquet_file(&table_location, "no_ids.parquet", false);
+
+        let file_io = FileIO::new_with_fs();
+        let reader = ArrowReaderBuilder::new(file_io).build();
+
+        let task = FileScanTask {
+            file_size_in_bytes: std::fs::metadata(&file_path).unwrap().len(),
+            start: 0,
+            length: std::fs::metadata(&file_path).unwrap().len(),
+            record_count: None,
+            data_file_path: file_path,
+            data_file_format: DataFileFormat::Parquet,
+            schema: schema.clone(),
+            project_field_ids: vec![1, 2],
+            predicate: None,
+            deletes: vec![],
+            partition: None,
+            partition_spec: None,
+            name_mapping: None,
+            case_sensitive: false,
+        };
+
+        let tasks = Box::pin(futures::stream::iter(vec![Ok(task)])) as FileScanTaskStream;
+        let batches: Vec<RecordBatch> = reader
+            .read(tasks)
+            .unwrap()
+            .try_collect()
+            .await
+            .unwrap();
+
+        assert_eq!(batches.len(), 1);
+        let batch = &batches[0];
+
+        let ts_array = batch
+            .column(0)
+            .as_any()
+            .downcast_ref::<TimestampMicrosecondArray>()
+            .expect("Expected TimestampMicrosecondArray");
+
+        for (i, expected) in expected_micros.iter().enumerate() {
+            assert_eq!(
+                ts_array.value(i),
+                *expected,
+                "Mismatch at row {i}: got {}, expected {expected}",
+                ts_array.value(i)
+            );
+        }
+    }
+
+    /// Test reading INT96 timestamps inside a struct field.
+    // TODO: Add test_read_int96_timestamps_in_struct once RecordBatchTransformer
+    // supports struct fields written by SerializedFileWriter. The coerce_int96_timestamps
+    // function already handles nested types via recursive traversal.
+    #[tokio::test]
+    #[ignore = "RecordBatchTransformer struct field matching needs work for this test setup"]
+    async fn test_read_int96_timestamps_in_struct() {
+        use arrow_array::TimestampMicrosecondArray;
+        use parquet::basic::{Repetition, Type as PhysicalType};
+        use parquet::data_type::Int96Type;
+        use parquet::file::writer::SerializedFileWriter;
+        use parquet::schema::types::Type as SchemaType;
+
+        let tmp_dir = TempDir::new().unwrap();
+        let table_location = tmp_dir.path().to_str().unwrap().to_string();
+        let file_path = format!("{table_location}/struct_int96.parquet");
+
+        // Parquet schema: message schema { required group data { optional INT96 ts; } }
+        // With field IDs: data=1, ts=2
+        let ts_type = SchemaType::primitive_type_builder("ts", PhysicalType::INT96)
+            .with_repetition(Repetition::OPTIONAL)
+            .with_id(Some(2))
+            .build()
+            .unwrap();
+
+        let struct_type = SchemaType::group_type_builder("data")
+            .with_repetition(Repetition::REQUIRED)
+            .with_id(Some(1))
+            .with_fields(vec![Arc::new(ts_type)])
+            .build()
+            .unwrap();
+
+        let parquet_schema = SchemaType::group_type_builder("schema")
+            .with_fields(vec![Arc::new(struct_type)])
+            .build()
+            .unwrap();
+
+        // Write INT96 value: year 3333 (Julian day 2_953_529), noon
+        let nanos_within_day = 43_200_000_000_000u64;
+        let julian_day = 2_953_529u32;
+        let expected_micros =
+            (julian_day as i64 - 2_440_588) * 86_400_000_000 + (nanos_within_day / 1_000) as i64;
+
+        let mut int96_val = parquet::data_type::Int96::new();
+        int96_val.set_data(
+            (nanos_within_day & 0xFFFFFFFF) as u32,
+            (nanos_within_day >> 32) as u32,
+            julian_day,
+        );
+
+        let file = File::create(&file_path).unwrap();
+        let mut writer =
+            SerializedFileWriter::new(file, Arc::new(parquet_schema), Default::default()).unwrap();
+
+        let mut row_group = writer.next_row_group().unwrap();
+        {
+            let mut col = row_group.next_column().unwrap().unwrap();
+            col.typed::<Int96Type>()
+                .write_batch(&[int96_val], Some(&[1i16]), None)
+                .unwrap();
+            col.close().unwrap();
+        }
+        row_group.close().unwrap();
+        writer.close().unwrap();
+
+        // Iceberg schema: struct<ts: timestamp>
+        let iceberg_schema = Arc::new(
+            Schema::builder()
+                .with_schema_id(1)
+                .with_fields(vec![NestedField::required(
+                    1,
+                    "data",
+                    Type::Struct(crate::spec::StructType::new(vec![
+                        NestedField::optional(2, "ts", Type::Primitive(PrimitiveType::Timestamp))
+                            .into(),
+                    ])),
+                )
+                .into()])
+                .build()
+                .unwrap(),
+        );
+
+        let file_io = FileIO::new_with_fs();
+        let reader = ArrowReaderBuilder::new(file_io).build();
+
+        let task = FileScanTask {
+            file_size_in_bytes: std::fs::metadata(&file_path).unwrap().len(),
+            start: 0,
+            length: std::fs::metadata(&file_path).unwrap().len(),
+            record_count: None,
+            data_file_path: file_path,
+            data_file_format: DataFileFormat::Parquet,
+            schema: iceberg_schema,
+            project_field_ids: vec![1, 2],
+            predicate: None,
+            deletes: vec![],
+            partition: None,
+            partition_spec: None,
+            name_mapping: None,
+            case_sensitive: false,
+        };
+
+        let tasks = Box::pin(futures::stream::iter(vec![Ok(task)])) as FileScanTaskStream;
+        let batches: Vec<RecordBatch> = reader
+            .read(tasks)
+            .unwrap()
+            .try_collect()
+            .await
+            .unwrap();
+
+        assert_eq!(batches.len(), 1);
+        let batch = &batches[0];
+
+        // Navigate into the struct to get the timestamp array
+        let struct_array = batch
+            .column(0)
+            .as_any()
+            .downcast_ref::<arrow_array::StructArray>()
+            .expect("Expected StructArray");
+        let ts_array = struct_array
+            .column(0)
+            .as_any()
+            .downcast_ref::<TimestampMicrosecondArray>()
+            .expect("Expected TimestampMicrosecondArray inside struct");
+
+        assert_eq!(
+            ts_array.value(0),
+            expected_micros,
+            "INT96 in struct: got {}, expected {expected_micros}",
+            ts_array.value(0)
+        );
+    }
 }

From 7d80d2e34b7bee4ea655b7f8f865cc6ba7fa0221 Mon Sep 17 00:00:00 2001
From: Matt Butrovich <mbutrovich@gmail.com>
Date: Tue, 31 Mar 2026 12:30:34 -0400
Subject: [PATCH 2/8] cleanup

---
 crates/iceberg/src/arrow/reader.rs | 340 ++++++++++++-----------------
 1 file changed, 143 insertions(+), 197 deletions(-)

diff --git a/crates/iceberg/src/arrow/reader.rs b/crates/iceberg/src/arrow/reader.rs
index 745de66d94..0aee3ec5ed 100644
--- a/crates/iceberg/src/arrow/reader.rs
+++ b/crates/iceberg/src/arrow/reader.rs
@@ -386,9 +386,8 @@ impl ArrowReader {
             arrow_metadata
         };
 
-        // Coerce INT96 timestamp columns to the correct TimeUnit from the Iceberg schema.
-        // Must happen before building the stream so arrow-rs decodes INT96 at the right
-        // resolution (e.g., microseconds instead of nanoseconds to avoid i64 overflow).
+        // Coerce INT96 timestamp columns to the resolution specified by the Iceberg schema.
+        // This must happen before building the stream reader to avoid i64 overflow in arrow-rs.
         let arrow_metadata = if let Some(coerced_schema) = coerce_int96_timestamps(
             arrow_metadata.metadata().file_metadata().schema_descr(),
             arrow_metadata.schema(),
@@ -1254,20 +1253,18 @@ fn add_fallback_field_ids_to_arrow_schema(arrow_schema: &ArrowSchemaRef) -> Arc<
     ))
 }
 
-/// Coerce Arrow schema types for Parquet INT96 columns to match the Iceberg table schema.
+/// Coerce Arrow schema types for INT96 columns to match the Iceberg table schema.
 ///
-/// INT96 is a legacy Parquet physical type for timestamps (common in Spark/Hive files).
-/// arrow-rs defaults INT96 to Timestamp(Nanosecond), which overflows i64 for dates outside
-/// ~1677-2262. The Iceberg spec defines timestamps as microsecond precision (timestamp) or
-/// nanosecond precision (timestamp_ns).
-/// See: https://iceberg.apache.org/spec/#primitive-types
+/// arrow-rs defaults INT96 to `Timestamp(Nanosecond)`, which overflows i64 for dates outside
+/// ~1677-2262. We use arrow-rs's schema hint mechanism to read INT96 at the resolution
+/// specified by the Iceberg schema (`timestamp` → microsecond, `timestamp_ns` → nanosecond).
 ///
-/// This function patches the Arrow schema so arrow-rs reads INT96 at the correct resolution,
-/// using the schema hint mechanism from arrow-rs PR #7285.
+/// Iceberg Java handles this differently: it bypasses parquet-mr with a custom column reader
+/// (`GenericParquetReaders.TimestampInt96Reader`). We achieve the same result via schema hints.
 ///
-/// Iceberg Java avoids this by using a custom INT96 column reader that bypasses parquet-mr's
-/// default decoding (GenericParquetReaders.TimestampInt96Reader). iceberg-rust delegates to
-/// arrow-rs, so we must pass the correct schema hint instead.
+/// References:
+/// - Iceberg spec primitive types: <https://iceberg.apache.org/spec/#primitive-types>
+/// - arrow-rs schema hint support: <https://github.com/apache/arrow-rs/pull/7285>
 fn coerce_int96_timestamps(
     parquet_schema: &SchemaDescriptor,
     arrow_schema: &ArrowSchemaRef,
@@ -1276,7 +1273,6 @@ fn coerce_int96_timestamps(
     use arrow_schema::{DataType, Field, Fields, TimeUnit};
     use parquet::basic::Type as PhysicalType;
 
-    // Collect paths of INT96 leaf columns from the Parquet schema
     let int96_paths: HashSet<String> = parquet_schema
         .columns()
         .iter()
@@ -1288,7 +1284,6 @@ fn coerce_int96_timestamps(
         return None;
     }
 
-    // Recursively coerce fields, tracking the Parquet column path for matching
     fn coerce_field(
         field: &FieldRef,
         path_parts: &[&str],
@@ -1314,41 +1309,38 @@ fn coerce_int96_timestamps(
                     .with_metadata(field.metadata().clone()),
                 )
             }
-            DataType::Timestamp(TimeUnit::Nanosecond, tz) => {
-                let full_path = path_parts.join(".");
-                if int96_paths.contains(&full_path) {
-                    // Look up the Iceberg field by ID to determine target TimeUnit
-                    let target_unit = field
-                        .metadata()
-                        .get(PARQUET_FIELD_ID_META_KEY)
-                        .and_then(|id_str| id_str.parse::<i32>().ok())
-                        .and_then(|field_id| iceberg_schema.field_by_id(field_id))
-                        .and_then(|iceberg_field| match &*iceberg_field.field_type {
-                            Type::Primitive(PrimitiveType::Timestamp)
-                            | Type::Primitive(PrimitiveType::Timestamptz) => {
-                                Some(TimeUnit::Microsecond)
-                            }
-                            Type::Primitive(PrimitiveType::TimestampNs)
-                            | Type::Primitive(PrimitiveType::TimestamptzNs) => {
-                                Some(TimeUnit::Nanosecond)
-                            }
-                            _ => None,
-                        })
-                        // Default to Microsecond for INT96 — matches Iceberg Java behavior
-                        .unwrap_or(TimeUnit::Microsecond);
-
-                    if target_unit != TimeUnit::Nanosecond {
-                        return Arc::new(
-                            Field::new(
-                                field.name(),
-                                DataType::Timestamp(target_unit, tz.clone()),
-                                field.is_nullable(),
-                            )
-                            .with_metadata(field.metadata().clone()),
-                        );
-                    }
+            DataType::Timestamp(TimeUnit::Nanosecond, tz)
+                if int96_paths.contains(&path_parts.join(".")) =>
+            {
+                let target_unit = field
+                    .metadata()
+                    .get(PARQUET_FIELD_ID_META_KEY)
+                    .and_then(|id_str| id_str.parse::<i32>().ok())
+                    .and_then(|field_id| iceberg_schema.field_by_id(field_id))
+                    .and_then(|f| match &*f.field_type {
+                        Type::Primitive(PrimitiveType::Timestamp | PrimitiveType::Timestamptz) => {
+                            Some(TimeUnit::Microsecond)
+                        }
+                        Type::Primitive(
+                            PrimitiveType::TimestampNs | PrimitiveType::TimestamptzNs,
+                        ) => Some(TimeUnit::Nanosecond),
+                        _ => None,
+                    })
+                    // Iceberg Java reads INT96 as microseconds by default
+                    .unwrap_or(TimeUnit::Microsecond);
+
+                if target_unit == TimeUnit::Nanosecond {
+                    return Arc::clone(field);
                 }
-                Arc::clone(field)
+
+                Arc::new(
+                    Field::new(
+                        field.name(),
+                        DataType::Timestamp(target_unit, tz.clone()),
+                        field.is_nullable(),
+                    )
+                    .with_metadata(field.metadata().clone()),
+                )
             }
             _ => Arc::clone(field),
         }
@@ -1357,7 +1349,14 @@ fn coerce_int96_timestamps(
     let coerced_fields: Vec<FieldRef> = arrow_schema
         .fields()
         .iter()
-        .map(|field| coerce_field(field, &[field.name().as_str()], &int96_paths, iceberg_schema))
+        .map(|field| {
+            coerce_field(
+                field,
+                &[field.name().as_str()],
+                &int96_paths,
+                iceberg_schema,
+            )
+        })
         .collect();
 
     let coerced = ArrowSchema::new_with_metadata(coerced_fields, arrow_schema.metadata().clone());
@@ -4805,10 +4804,8 @@ message schema {
         assert_eq!(result[2], expected_2);
     }
 
-    /// Helper to write a Parquet file with INT96 timestamps using the low-level writer.
-    /// Returns (file_path, expected_microsecond_values).
-    ///
-    /// We must use SerializedFileWriter because ArrowWriter cannot write INT96.
+    /// Writes a Parquet file with INT96 timestamps using SerializedFileWriter
+    /// (ArrowWriter cannot write INT96). Returns (file_path, expected_microsecond_values).
     fn write_int96_parquet_file(
         table_location: &str,
         filename: &str,
@@ -4821,13 +4818,10 @@ message schema {
 
         let file_path = format!("{table_location}/{filename}");
 
-        // Build Parquet schema with INT96 timestamp and INT32 id
-        let mut ts_builder =
-            SchemaType::primitive_type_builder("ts", PhysicalType::INT96)
-                .with_repetition(Repetition::OPTIONAL);
-        let mut id_builder =
-            SchemaType::primitive_type_builder("id", PhysicalType::INT32)
-                .with_repetition(Repetition::REQUIRED);
+        let mut ts_builder = SchemaType::primitive_type_builder("ts", PhysicalType::INT96)
+            .with_repetition(Repetition::OPTIONAL);
+        let mut id_builder = SchemaType::primitive_type_builder("id", PhysicalType::INT32)
+            .with_repetition(Repetition::REQUIRED);
 
         if with_field_ids {
             ts_builder = ts_builder.with_id(Some(1));
@@ -4844,28 +4838,33 @@ message schema {
 
         // INT96 encoding: [nanos_low_u32, nanos_high_u32, julian_day_u32]
         // Julian day 2_440_588 = Unix epoch (1970-01-01)
-        //
-        // We pick dates outside the i64 nanosecond range (~1677-2262) to trigger
-        // the overflow bug. Year 3333 ≈ Julian day 2_953_529.
+        // Dates outside the i64 nanosecond range (~1677-2262) overflow without coercion.
+        const UNIX_EPOCH_JULIAN: i64 = 2_440_588;
+        const MICROS_PER_DAY: i64 = 86_400_000_000;
+        const NOON_NANOS: u64 = 43_200_000_000_000;
+
         let test_data: Vec<(u32, u32, u32, i64)> = vec![
-            // (nanos_low, nanos_high, julian_day, expected_micros_since_epoch)
-            // Day 2_953_529 = 3333-01-01, nanos_within_day = 0
-            (0, 0, 2_953_529, {
-                (2_953_529i64 - 2_440_588) * 86_400_000_000
-            }),
-            // Day 2_953_529 = 3333-01-01, nanos_within_day = 43_200_000_000_000 (12:00:00 noon)
-            // 43_200_000_000_000 as two u32s: low = 43_200_000_000_000 & 0xFFFFFFFF, high = 43_200_000_000_000 >> 32
-            ({
-                (43_200_000_000_000u64 & 0xFFFFFFFF) as u32
-            }, {
-                (43_200_000_000_000u64 >> 32) as u32
-            }, 2_953_529, {
-                (2_953_529i64 - 2_440_588) * 86_400_000_000 + 43_200_000_000
-            }),
-            // Day 2_488_070 = 2100-01-01, nanos_within_day = 0
-            (0, 0, 2_488_070, {
-                (2_488_070i64 - 2_440_588) * 86_400_000_000
-            }),
+            // 3333-01-01 00:00:00
+            (
+                0,
+                0,
+                2_953_529,
+                (2_953_529 - UNIX_EPOCH_JULIAN) * MICROS_PER_DAY,
+            ),
+            // 3333-01-01 12:00:00
+            (
+                (NOON_NANOS & 0xFFFFFFFF) as u32,
+                (NOON_NANOS >> 32) as u32,
+                2_953_529,
+                (2_953_529 - UNIX_EPOCH_JULIAN) * MICROS_PER_DAY + (NOON_NANOS / 1_000) as i64,
+            ),
+            // 2100-01-01 00:00:00
+            (
+                0,
+                0,
+                2_488_070,
+                (2_488_070 - UNIX_EPOCH_JULIAN) * MICROS_PER_DAY,
+            ),
         ];
 
         let int96_values: Vec<Int96> = test_data
@@ -4886,7 +4885,6 @@ message schema {
 
         let mut row_group = writer.next_row_group().unwrap();
         {
-            // Write ts column (optional — all present, so def_levels = [1, 1, 1])
             let mut col = row_group.next_column().unwrap().unwrap();
             col.typed::<Int96Type>()
                 .write_batch(&int96_values, Some(&vec![1i16; test_data.len()]), None)
@@ -4894,7 +4892,6 @@ message schema {
             col.close().unwrap();
         }
         {
-            // Write id column (required — no def levels)
             let mut col = row_group.next_column().unwrap().unwrap();
             col.typed::<Int32Type>()
                 .write_batch(&id_values, None, None)
@@ -4907,42 +4904,27 @@ message schema {
         (file_path, expected_micros)
     }
 
-    /// Test reading INT96 timestamps through iceberg-rust's ArrowReader.
-    /// Files WITH embedded field IDs (branch 1 of schema resolution).
-    #[tokio::test]
-    async fn test_read_int96_timestamps_with_field_ids() {
+    /// Read INT96 Parquet file through ArrowReader and verify microsecond timestamps.
+    async fn assert_int96_read_matches(
+        file_path: &str,
+        schema: SchemaRef,
+        project_field_ids: Vec<i32>,
+        expected_micros: &[i64],
+    ) {
         use arrow_array::TimestampMicrosecondArray;
 
-        let schema = Arc::new(
-            Schema::builder()
-                .with_schema_id(1)
-                .with_fields(vec![
-                    NestedField::optional(1, "ts", Type::Primitive(PrimitiveType::Timestamp))
-                        .into(),
-                    NestedField::required(2, "id", Type::Primitive(PrimitiveType::Int)).into(),
-                ])
-                .build()
-                .unwrap(),
-        );
-
-        let tmp_dir = TempDir::new().unwrap();
-        let table_location = tmp_dir.path().to_str().unwrap().to_string();
-
-        let (file_path, expected_micros) =
-            write_int96_parquet_file(&table_location, "with_ids.parquet", true);
-
         let file_io = FileIO::new_with_fs();
         let reader = ArrowReaderBuilder::new(file_io).build();
 
         let task = FileScanTask {
-            file_size_in_bytes: std::fs::metadata(&file_path).unwrap().len(),
+            file_size_in_bytes: std::fs::metadata(file_path).unwrap().len(),
             start: 0,
-            length: std::fs::metadata(&file_path).unwrap().len(),
+            length: std::fs::metadata(file_path).unwrap().len(),
             record_count: None,
-            data_file_path: file_path,
+            data_file_path: file_path.to_string(),
             data_file_format: DataFileFormat::Parquet,
-            schema: schema.clone(),
-            project_field_ids: vec![1, 2],
+            schema,
+            project_field_ids,
             predicate: None,
             deletes: vec![],
             partition: None,
@@ -4952,17 +4934,10 @@ message schema {
         };
 
         let tasks = Box::pin(futures::stream::iter(vec![Ok(task)])) as FileScanTaskStream;
-        let batches: Vec<RecordBatch> = reader
-            .read(tasks)
-            .unwrap()
-            .try_collect()
-            .await
-            .unwrap();
+        let batches: Vec<RecordBatch> = reader.read(tasks).unwrap().try_collect().await.unwrap();
 
         assert_eq!(batches.len(), 1);
-        let batch = &batches[0];
-
-        let ts_array = batch
+        let ts_array = batches[0]
             .column(0)
             .as_any()
             .downcast_ref::<TimestampMicrosecondArray>()
@@ -4972,18 +4947,15 @@ message schema {
             assert_eq!(
                 ts_array.value(i),
                 *expected,
-                "Mismatch at row {i}: got {}, expected {expected}",
+                "Row {i}: got {}, expected {expected}",
                 ts_array.value(i)
             );
         }
     }
 
-    /// Test reading INT96 timestamps from migrated files WITHOUT field IDs
-    /// (branches 2/3 of schema resolution — the common migration case).
+    /// Files with embedded field IDs (branch 1 of schema resolution).
     #[tokio::test]
-    async fn test_read_int96_timestamps_without_field_ids() {
-        use arrow_array::TimestampMicrosecondArray;
-
+    async fn test_read_int96_timestamps_with_field_ids() {
         let schema = Arc::new(
             Schema::builder()
                 .with_schema_id(1)
@@ -4998,63 +4970,37 @@ message schema {
 
         let tmp_dir = TempDir::new().unwrap();
         let table_location = tmp_dir.path().to_str().unwrap().to_string();
-
         let (file_path, expected_micros) =
-            write_int96_parquet_file(&table_location, "no_ids.parquet", false);
-
-        let file_io = FileIO::new_with_fs();
-        let reader = ArrowReaderBuilder::new(file_io).build();
-
-        let task = FileScanTask {
-            file_size_in_bytes: std::fs::metadata(&file_path).unwrap().len(),
-            start: 0,
-            length: std::fs::metadata(&file_path).unwrap().len(),
-            record_count: None,
-            data_file_path: file_path,
-            data_file_format: DataFileFormat::Parquet,
-            schema: schema.clone(),
-            project_field_ids: vec![1, 2],
-            predicate: None,
-            deletes: vec![],
-            partition: None,
-            partition_spec: None,
-            name_mapping: None,
-            case_sensitive: false,
-        };
+            write_int96_parquet_file(&table_location, "with_ids.parquet", true);
 
-        let tasks = Box::pin(futures::stream::iter(vec![Ok(task)])) as FileScanTaskStream;
-        let batches: Vec<RecordBatch> = reader
-            .read(tasks)
-            .unwrap()
-            .try_collect()
-            .await
-            .unwrap();
+        assert_int96_read_matches(&file_path, schema, vec![1, 2], &expected_micros).await;
+    }
 
-        assert_eq!(batches.len(), 1);
-        let batch = &batches[0];
+    /// Migrated files without field IDs (branches 2/3 of schema resolution).
+    #[tokio::test]
+    async fn test_read_int96_timestamps_without_field_ids() {
+        let schema = Arc::new(
+            Schema::builder()
+                .with_schema_id(1)
+                .with_fields(vec![
+                    NestedField::optional(1, "ts", Type::Primitive(PrimitiveType::Timestamp))
+                        .into(),
+                    NestedField::required(2, "id", Type::Primitive(PrimitiveType::Int)).into(),
+                ])
+                .build()
+                .unwrap(),
+        );
 
-        let ts_array = batch
-            .column(0)
-            .as_any()
-            .downcast_ref::<TimestampMicrosecondArray>()
-            .expect("Expected TimestampMicrosecondArray");
+        let tmp_dir = TempDir::new().unwrap();
+        let table_location = tmp_dir.path().to_str().unwrap().to_string();
+        let (file_path, expected_micros) =
+            write_int96_parquet_file(&table_location, "no_ids.parquet", false);
 
-        for (i, expected) in expected_micros.iter().enumerate() {
-            assert_eq!(
-                ts_array.value(i),
-                *expected,
-                "Mismatch at row {i}: got {}, expected {expected}",
-                ts_array.value(i)
-            );
-        }
+        assert_int96_read_matches(&file_path, schema, vec![1, 2], &expected_micros).await;
     }
 
     /// Test reading INT96 timestamps inside a struct field.
-    // TODO: Add test_read_int96_timestamps_in_struct once RecordBatchTransformer
-    // supports struct fields written by SerializedFileWriter. The coerce_int96_timestamps
-    // function already handles nested types via recursive traversal.
     #[tokio::test]
-    #[ignore = "RecordBatchTransformer struct field matching needs work for this test setup"]
     async fn test_read_int96_timestamps_in_struct() {
         use arrow_array::TimestampMicrosecondArray;
         use parquet::basic::{Repetition, Type as PhysicalType};
@@ -5066,8 +5012,6 @@ message schema {
         let table_location = tmp_dir.path().to_str().unwrap().to_string();
         let file_path = format!("{table_location}/struct_int96.parquet");
 
-        // Parquet schema: message schema { required group data { optional INT96 ts; } }
-        // With field IDs: data=1, ts=2
         let ts_type = SchemaType::primitive_type_builder("ts", PhysicalType::INT96)
             .with_repetition(Repetition::OPTIONAL)
             .with_id(Some(2))
@@ -5086,11 +5030,13 @@ message schema {
             .build()
             .unwrap();
 
-        // Write INT96 value: year 3333 (Julian day 2_953_529), noon
+        // Year 3333 (Julian day 2_953_529), noon — outside the i64 nanosecond range
+        const UNIX_EPOCH_JULIAN: i64 = 2_440_588;
+        const MICROS_PER_DAY: i64 = 86_400_000_000;
         let nanos_within_day = 43_200_000_000_000u64;
         let julian_day = 2_953_529u32;
-        let expected_micros =
-            (julian_day as i64 - 2_440_588) * 86_400_000_000 + (nanos_within_day / 1_000) as i64;
+        let expected_micros = (julian_day as i64 - UNIX_EPOCH_JULIAN) * MICROS_PER_DAY
+            + (nanos_within_day / 1_000) as i64;
 
         let mut int96_val = parquet::data_type::Int96::new();
         int96_val.set_data(
@@ -5114,19 +5060,24 @@ message schema {
         row_group.close().unwrap();
         writer.close().unwrap();
 
-        // Iceberg schema: struct<ts: timestamp>
         let iceberg_schema = Arc::new(
             Schema::builder()
                 .with_schema_id(1)
-                .with_fields(vec![NestedField::required(
-                    1,
-                    "data",
-                    Type::Struct(crate::spec::StructType::new(vec![
-                        NestedField::optional(2, "ts", Type::Primitive(PrimitiveType::Timestamp))
+                .with_fields(vec![
+                    NestedField::required(
+                        1,
+                        "data",
+                        Type::Struct(crate::spec::StructType::new(vec![
+                            NestedField::optional(
+                                2,
+                                "ts",
+                                Type::Primitive(PrimitiveType::Timestamp),
+                            )
                             .into(),
-                    ])),
-                )
-                .into()])
+                        ])),
+                    )
+                    .into(),
+                ])
                 .build()
                 .unwrap(),
         );
@@ -5134,6 +5085,7 @@ message schema {
         let file_io = FileIO::new_with_fs();
         let reader = ArrowReaderBuilder::new(file_io).build();
 
+        // Only project the top-level struct field; nested fields are included implicitly
         let task = FileScanTask {
             file_size_in_bytes: std::fs::metadata(&file_path).unwrap().len(),
             start: 0,
@@ -5142,7 +5094,7 @@ message schema {
             data_file_path: file_path,
             data_file_format: DataFileFormat::Parquet,
             schema: iceberg_schema,
-            project_field_ids: vec![1, 2],
+            project_field_ids: vec![1],
             predicate: None,
             deletes: vec![],
             partition: None,
@@ -5152,17 +5104,11 @@ message schema {
         };
 
         let tasks = Box::pin(futures::stream::iter(vec![Ok(task)])) as FileScanTaskStream;
-        let batches: Vec<RecordBatch> = reader
-            .read(tasks)
-            .unwrap()
-            .try_collect()
-            .await
-            .unwrap();
+        let batches: Vec<RecordBatch> = reader.read(tasks).unwrap().try_collect().await.unwrap();
 
         assert_eq!(batches.len(), 1);
         let batch = &batches[0];
 
-        // Navigate into the struct to get the timestamp array
         let struct_array = batch
             .column(0)
             .as_any()

From 19df205ebbcd60cf3b235905a42511fad6cd7e18 Mon Sep 17 00:00:00 2001
From: Matt Butrovich <mbutrovich@gmail.com>
Date: Tue, 31 Mar 2026 14:37:51 -0400
Subject: [PATCH 3/8] address PR feedback, add more tests

---
 crates/iceberg/src/arrow/reader.rs | 624 +++++++++++++++++++++--------
 1 file changed, 460 insertions(+), 164 deletions(-)

diff --git a/crates/iceberg/src/arrow/reader.rs b/crates/iceberg/src/arrow/reader.rs
index 0aee3ec5ed..89bd2168f6 100644
--- a/crates/iceberg/src/arrow/reader.rs
+++ b/crates/iceberg/src/arrow/reader.rs
@@ -27,7 +27,8 @@ use arrow_array::{Array, ArrayRef, BooleanArray, Datum as ArrowDatum, RecordBatc
 use arrow_cast::cast::cast;
 use arrow_ord::cmp::{eq, gt, gt_eq, lt, lt_eq, neq};
 use arrow_schema::{
-    ArrowError, DataType, FieldRef, Schema as ArrowSchema, SchemaRef as ArrowSchemaRef,
+    ArrowError, DataType, Field, FieldRef, Fields, Schema as ArrowSchema,
+    SchemaRef as ArrowSchemaRef, TimeUnit,
 };
 use arrow_string::like::starts_with;
 use bytes::Bytes;
@@ -39,10 +40,11 @@ use parquet::arrow::arrow_reader::{
 };
 use parquet::arrow::async_reader::AsyncFileReader;
 use parquet::arrow::{PARQUET_FIELD_ID_META_KEY, ParquetRecordBatchStreamBuilder, ProjectionMask};
+use parquet::basic::Type as PhysicalType;
 use parquet::file::metadata::{
     PageIndexPolicy, ParquetMetaData, ParquetMetaDataReader, RowGroupMetaData,
 };
-use parquet::schema::types::{SchemaDescriptor, Type as ParquetType};
+use parquet::schema::types::{ColumnPath, SchemaDescriptor, Type as ParquetType};
 use typed_builder::TypedBuilder;
 
 use crate::arrow::caching_delete_file_loader::CachingDeleteFileLoader;
@@ -393,12 +395,14 @@ impl ArrowReader {
             arrow_metadata.schema(),
             &task.schema,
         ) {
-            let options = ArrowReaderOptions::new().with_schema(coerced_schema);
+            let options = ArrowReaderOptions::new().with_schema(Arc::clone(&coerced_schema));
             ArrowReaderMetadata::try_new(Arc::clone(arrow_metadata.metadata()), options).map_err(
                 |e| {
                     Error::new(
                         ErrorKind::Unexpected,
-                        "Failed to create ArrowReaderMetadata with INT96-coerced schema",
+                        format!(
+                            "Failed to create ArrowReaderMetadata with INT96-coerced schema: {coerced_schema}"
+                        ),
                     )
                     .with_source(e)
                 },
@@ -1270,101 +1274,182 @@ fn coerce_int96_timestamps(
     arrow_schema: &ArrowSchemaRef,
     iceberg_schema: &Schema,
 ) -> Option<Arc<ArrowSchema>> {
-    use arrow_schema::{DataType, Field, Fields, TimeUnit};
-    use parquet::basic::Type as PhysicalType;
-
-    let int96_paths: HashSet<String> = parquet_schema
+    let int96_paths: Vec<ColumnPath> = parquet_schema
         .columns()
         .iter()
         .filter(|col| col.physical_type() == PhysicalType::INT96)
-        .map(|col| col.path().string())
+        .map(|col| col.path().clone())
         .collect();
 
     if int96_paths.is_empty() {
         return None;
     }
 
-    fn coerce_field(
-        field: &FieldRef,
-        path_parts: &[&str],
-        int96_paths: &HashSet<String>,
-        iceberg_schema: &Schema,
-    ) -> FieldRef {
-        match field.data_type() {
-            DataType::Struct(fields) => {
-                let new_fields: Vec<FieldRef> = fields
-                    .iter()
-                    .map(|child| {
-                        let mut child_path = path_parts.to_vec();
-                        child_path.push(child.name().as_str());
-                        coerce_field(child, &child_path, int96_paths, iceberg_schema)
-                    })
-                    .collect();
-                Arc::new(
-                    Field::new(
-                        field.name(),
-                        DataType::Struct(Fields::from(new_fields)),
-                        field.is_nullable(),
-                    )
-                    .with_metadata(field.metadata().clone()),
-                )
+    let mut fields: Vec<FieldRef> = arrow_schema.fields().iter().cloned().collect();
+    let mut any_changed = false;
+
+    for path in &int96_paths {
+        let parts = path.parts();
+        if let Some(idx) = fields.iter().position(|f| f.name() == &parts[0]) {
+            let (new_field, changed) = coerce_field_at_path(&fields[idx], parts, 0, iceberg_schema);
+            if changed {
+                fields[idx] = new_field;
+                any_changed = true;
             }
-            DataType::Timestamp(TimeUnit::Nanosecond, tz)
-                if int96_paths.contains(&path_parts.join(".")) =>
-            {
-                let target_unit = field
-                    .metadata()
-                    .get(PARQUET_FIELD_ID_META_KEY)
-                    .and_then(|id_str| id_str.parse::<i32>().ok())
-                    .and_then(|field_id| iceberg_schema.field_by_id(field_id))
-                    .and_then(|f| match &*f.field_type {
-                        Type::Primitive(PrimitiveType::Timestamp | PrimitiveType::Timestamptz) => {
-                            Some(TimeUnit::Microsecond)
-                        }
-                        Type::Primitive(
-                            PrimitiveType::TimestampNs | PrimitiveType::TimestamptzNs,
-                        ) => Some(TimeUnit::Nanosecond),
-                        _ => None,
-                    })
-                    // Iceberg Java reads INT96 as microseconds by default
-                    .unwrap_or(TimeUnit::Microsecond);
+        }
+    }
 
-                if target_unit == TimeUnit::Nanosecond {
-                    return Arc::clone(field);
+    if any_changed {
+        Some(Arc::new(ArrowSchema::new_with_metadata(
+            fields,
+            arrow_schema.metadata().clone(),
+        )))
+    } else {
+        None
+    }
+}
+
+/// Navigate an Arrow field tree using a Parquet column path and coerce the leaf
+/// INT96 `Timestamp(Nanosecond)` to the resolution specified by the Iceberg schema.
+///
+/// Parquet column paths include intermediate group names for nested types (e.g. the
+/// repeated group in LIST encoding). This function accounts for those extra levels
+/// when descending through List, LargeList, and Map Arrow types.
+fn coerce_field_at_path(
+    field: &FieldRef,
+    parquet_path: &[String],
+    depth: usize,
+    iceberg_schema: &Schema,
+) -> (FieldRef, bool) {
+    if depth == parquet_path.len() - 1 {
+        return coerce_timestamp_field(field, iceberg_schema);
+    }
+
+    match field.data_type() {
+        DataType::Struct(fields) => {
+            let child_name = &parquet_path[depth + 1];
+            let mut new_fields: Vec<FieldRef> = fields.iter().cloned().collect();
+            let mut changed = false;
+
+            if let Some(idx) = new_fields.iter().position(|f| f.name() == child_name) {
+                let (new_child, child_changed) =
+                    coerce_field_at_path(&new_fields[idx], parquet_path, depth + 1, iceberg_schema);
+                if child_changed {
+                    new_fields[idx] = new_child;
+                    changed = true;
                 }
+            }
 
-                Arc::new(
+            if changed {
+                let new_field = Arc::new(
                     Field::new(
                         field.name(),
-                        DataType::Timestamp(target_unit, tz.clone()),
+                        DataType::Struct(Fields::from(new_fields)),
                         field.is_nullable(),
                     )
                     .with_metadata(field.metadata().clone()),
-                )
+                );
+                (new_field, true)
+            } else {
+                (Arc::clone(field), false)
             }
-            _ => Arc::clone(field),
         }
+        DataType::List(element_field) | DataType::LargeList(element_field) => {
+            // Parquet 3-level LIST encoding inserts a repeated group between the list
+            // and its element, e.g. `my_list.list.element` where `list` is the
+            // intermediate group. The group name varies across writers — the spec says
+            // "list" but legacy data uses "element", "array", `<parent>_tuple`, etc.
+            // We skip it (depth + 1) since we navigate by Parquet ColumnPath parts
+            // which contain the actual name from the file. See the Parquet LogicalTypes
+            // spec for the backward-compatibility rules:
+            // https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#lists
+            if depth + 2 < parquet_path.len() {
+                debug_assert_eq!(
+                    element_field.name(),
+                    &parquet_path[depth + 2],
+                    "Arrow list element name '{}' does not match Parquet path segment '{}'",
+                    element_field.name(),
+                    &parquet_path[depth + 2]
+                );
+                let (new_element, changed) =
+                    coerce_field_at_path(element_field, parquet_path, depth + 2, iceberg_schema);
+                if changed {
+                    let new_type = match field.data_type() {
+                        DataType::List(_) => DataType::List(new_element),
+                        DataType::LargeList(_) => DataType::LargeList(new_element),
+                        _ => unreachable!(),
+                    };
+                    let new_field = Arc::new(
+                        Field::new(field.name(), new_type, field.is_nullable())
+                            .with_metadata(field.metadata().clone()),
+                    );
+                    return (new_field, true);
+                }
+            }
+            (Arc::clone(field), false)
+        }
+        DataType::Map(entries_field, sorted) => {
+            // The entries field is a struct containing key/value fields. The Parquet
+            // path segment at depth + 1 is the entries group name (e.g. "key_value"),
+            // which corresponds to the Arrow entries struct field.
+            if depth + 1 < parquet_path.len() {
+                let (new_entries, changed) =
+                    coerce_field_at_path(entries_field, parquet_path, depth + 1, iceberg_schema);
+                if changed {
+                    let new_field = Arc::new(
+                        Field::new(
+                            field.name(),
+                            DataType::Map(new_entries, *sorted),
+                            field.is_nullable(),
+                        )
+                        .with_metadata(field.metadata().clone()),
+                    );
+                    return (new_field, true);
+                }
+            }
+            (Arc::clone(field), false)
+        }
+        _ => (Arc::clone(field), false),
     }
+}
 
-    let coerced_fields: Vec<FieldRef> = arrow_schema
-        .fields()
-        .iter()
-        .map(|field| {
-            coerce_field(
-                field,
-                &[field.name().as_str()],
-                &int96_paths,
-                iceberg_schema,
-            )
-        })
-        .collect();
+/// Coerce a single `Timestamp(Nanosecond)` field to the resolution indicated by the
+/// Iceberg schema. Falls back to microsecond when field IDs are unavailable, matching
+/// Iceberg Java behavior.
+fn coerce_timestamp_field(field: &FieldRef, iceberg_schema: &Schema) -> (FieldRef, bool) {
+    if let DataType::Timestamp(TimeUnit::Nanosecond, tz) = field.data_type() {
+        let target_unit = field
+            .metadata()
+            .get(PARQUET_FIELD_ID_META_KEY)
+            .and_then(|id_str| id_str.parse::<i32>().ok())
+            .and_then(|field_id| iceberg_schema.field_by_id(field_id))
+            .and_then(|f| match &*f.field_type {
+                Type::Primitive(PrimitiveType::Timestamp | PrimitiveType::Timestamptz) => {
+                    Some(TimeUnit::Microsecond)
+                }
+                Type::Primitive(PrimitiveType::TimestampNs | PrimitiveType::TimestamptzNs) => {
+                    Some(TimeUnit::Nanosecond)
+                }
+                _ => None,
+            })
+            // Iceberg Java reads INT96 as microseconds by default
+            .unwrap_or(TimeUnit::Microsecond);
 
-    let coerced = ArrowSchema::new_with_metadata(coerced_fields, arrow_schema.metadata().clone());
+        if target_unit == TimeUnit::Nanosecond {
+            return (Arc::clone(field), false);
+        }
 
-    if &coerced != arrow_schema.as_ref() {
-        Some(Arc::new(coerced))
+        let new_field = Arc::new(
+            Field::new(
+                field.name(),
+                DataType::Timestamp(target_unit, tz.clone()),
+                field.is_nullable(),
+            )
+            .with_metadata(field.metadata().clone()),
+        );
+        (new_field, true)
     } else {
-        None
+        (Arc::clone(field), false)
     }
 }
 
@@ -2094,16 +2179,20 @@ mod tests {
     use std::sync::Arc;
 
     use arrow_array::cast::AsArray;
-    use arrow_array::{ArrayRef, LargeStringArray, RecordBatch, StringArray};
+    use arrow_array::{
+        ArrayRef, LargeStringArray, RecordBatch, StringArray, TimestampMicrosecondArray,
+    };
     use arrow_schema::{DataType, Field, Schema as ArrowSchema, TimeUnit};
     use futures::TryStreamExt;
     use parquet::arrow::arrow_reader::{RowSelection, RowSelector};
     use parquet::arrow::{ArrowWriter, ProjectionMask};
-    use parquet::basic::Compression;
+    use parquet::basic::{Compression, Repetition, Type as PhysicalType};
+    use parquet::data_type::{ByteArrayType, Int32Type, Int96, Int96Type};
     use parquet::file::metadata::{ColumnChunkMetaData, RowGroupMetaData};
     use parquet::file::properties::WriterProperties;
+    use parquet::file::writer::SerializedFileWriter;
     use parquet::schema::parser::parse_message_type;
-    use parquet::schema::types::{SchemaDescPtr, SchemaDescriptor};
+    use parquet::schema::types::{SchemaDescPtr, SchemaDescriptor, Type as SchemaType};
     use roaring::RoaringTreemap;
     use tempfile::TempDir;
 
@@ -4804,6 +4893,58 @@ message schema {
         assert_eq!(result[2], expected_2);
     }
 
+    // INT96 encoding: [nanos_low_u32, nanos_high_u32, julian_day_u32]
+    // Julian day 2_440_588 = Unix epoch (1970-01-01)
+    const UNIX_EPOCH_JULIAN: i64 = 2_440_588;
+    const MICROS_PER_DAY: i64 = 86_400_000_000;
+    /// Noon on 3333-01-01 (Julian day 2_953_529) — outside the i64 nanosecond range (~1677-2262).
+    const INT96_TEST_NANOS_WITHIN_DAY: u64 = 43_200_000_000_000;
+    const INT96_TEST_JULIAN_DAY: u32 = 2_953_529;
+
+    /// Build an INT96 value and its expected microsecond interpretation.
+    fn make_int96_test_value() -> (Int96, i64) {
+        let mut val = Int96::new();
+        val.set_data(
+            (INT96_TEST_NANOS_WITHIN_DAY & 0xFFFFFFFF) as u32,
+            (INT96_TEST_NANOS_WITHIN_DAY >> 32) as u32,
+            INT96_TEST_JULIAN_DAY,
+        );
+        let expected_micros = (INT96_TEST_JULIAN_DAY as i64 - UNIX_EPOCH_JULIAN) * MICROS_PER_DAY
+            + (INT96_TEST_NANOS_WITHIN_DAY / 1_000) as i64;
+        (val, expected_micros)
+    }
+
+    /// Read a Parquet file through ArrowReader and return the resulting batches.
+    async fn read_int96_batches(
+        file_path: &str,
+        schema: SchemaRef,
+        project_field_ids: Vec<i32>,
+    ) -> Vec<RecordBatch> {
+        let file_io = FileIO::new_with_fs();
+        let reader = ArrowReaderBuilder::new(file_io).build();
+
+        let file_size = std::fs::metadata(file_path).unwrap().len();
+        let task = FileScanTask {
+            file_size_in_bytes: file_size,
+            start: 0,
+            length: file_size,
+            record_count: None,
+            data_file_path: file_path.to_string(),
+            data_file_format: DataFileFormat::Parquet,
+            schema,
+            project_field_ids,
+            predicate: None,
+            deletes: vec![],
+            partition: None,
+            partition_spec: None,
+            name_mapping: None,
+            case_sensitive: false,
+        };
+
+        let tasks = Box::pin(futures::stream::iter(vec![Ok(task)])) as FileScanTaskStream;
+        reader.read(tasks).unwrap().try_collect().await.unwrap()
+    }
+
     /// Writes a Parquet file with INT96 timestamps using SerializedFileWriter
     /// (ArrowWriter cannot write INT96). Returns (file_path, expected_microsecond_values).
     fn write_int96_parquet_file(
@@ -4811,11 +4952,6 @@ message schema {
         filename: &str,
         with_field_ids: bool,
     ) -> (String, Vec<i64>) {
-        use parquet::basic::{Repetition, Type as PhysicalType};
-        use parquet::data_type::{Int32Type, Int96, Int96Type};
-        use parquet::file::writer::SerializedFileWriter;
-        use parquet::schema::types::Type as SchemaType;
-
         let file_path = format!("{table_location}/{filename}");
 
         let mut ts_builder = SchemaType::primitive_type_builder("ts", PhysicalType::INT96)
@@ -4836,34 +4972,33 @@ message schema {
             .build()
             .unwrap();
 
-        // INT96 encoding: [nanos_low_u32, nanos_high_u32, julian_day_u32]
-        // Julian day 2_440_588 = Unix epoch (1970-01-01)
         // Dates outside the i64 nanosecond range (~1677-2262) overflow without coercion.
-        const UNIX_EPOCH_JULIAN: i64 = 2_440_588;
-        const MICROS_PER_DAY: i64 = 86_400_000_000;
-        const NOON_NANOS: u64 = 43_200_000_000_000;
+        const NOON_NANOS: u64 = INT96_TEST_NANOS_WITHIN_DAY;
+        const JULIAN_3333: u32 = INT96_TEST_JULIAN_DAY;
+        const JULIAN_2100: u32 = 2_488_070;
 
         let test_data: Vec<(u32, u32, u32, i64)> = vec![
             // 3333-01-01 00:00:00
             (
                 0,
                 0,
-                2_953_529,
-                (2_953_529 - UNIX_EPOCH_JULIAN) * MICROS_PER_DAY,
+                JULIAN_3333,
+                (JULIAN_3333 as i64 - UNIX_EPOCH_JULIAN) * MICROS_PER_DAY,
             ),
             // 3333-01-01 12:00:00
             (
                 (NOON_NANOS & 0xFFFFFFFF) as u32,
                 (NOON_NANOS >> 32) as u32,
-                2_953_529,
-                (2_953_529 - UNIX_EPOCH_JULIAN) * MICROS_PER_DAY + (NOON_NANOS / 1_000) as i64,
+                JULIAN_3333,
+                (JULIAN_3333 as i64 - UNIX_EPOCH_JULIAN) * MICROS_PER_DAY
+                    + (NOON_NANOS / 1_000) as i64,
             ),
             // 2100-01-01 00:00:00
             (
                 0,
                 0,
-                2_488_070,
-                (2_488_070 - UNIX_EPOCH_JULIAN) * MICROS_PER_DAY,
+                JULIAN_2100,
+                (JULIAN_2100 as i64 - UNIX_EPOCH_JULIAN) * MICROS_PER_DAY,
             ),
         ];
 
@@ -4904,37 +5039,14 @@ message schema {
         (file_path, expected_micros)
     }
 
-    /// Read INT96 Parquet file through ArrowReader and verify microsecond timestamps.
+    /// Read INT96 Parquet file through ArrowReader and verify top-level microsecond timestamps.
     async fn assert_int96_read_matches(
         file_path: &str,
         schema: SchemaRef,
         project_field_ids: Vec<i32>,
         expected_micros: &[i64],
     ) {
-        use arrow_array::TimestampMicrosecondArray;
-
-        let file_io = FileIO::new_with_fs();
-        let reader = ArrowReaderBuilder::new(file_io).build();
-
-        let task = FileScanTask {
-            file_size_in_bytes: std::fs::metadata(file_path).unwrap().len(),
-            start: 0,
-            length: std::fs::metadata(file_path).unwrap().len(),
-            record_count: None,
-            data_file_path: file_path.to_string(),
-            data_file_format: DataFileFormat::Parquet,
-            schema,
-            project_field_ids,
-            predicate: None,
-            deletes: vec![],
-            partition: None,
-            partition_spec: None,
-            name_mapping: None,
-            case_sensitive: false,
-        };
-
-        let tasks = Box::pin(futures::stream::iter(vec![Ok(task)])) as FileScanTaskStream;
-        let batches: Vec<RecordBatch> = reader.read(tasks).unwrap().try_collect().await.unwrap();
+        let batches = read_int96_batches(file_path, schema, project_field_ids).await;
 
         assert_eq!(batches.len(), 1);
         let ts_array = batches[0]
@@ -5002,12 +5114,6 @@ message schema {
     /// Test reading INT96 timestamps inside a struct field.
     #[tokio::test]
     async fn test_read_int96_timestamps_in_struct() {
-        use arrow_array::TimestampMicrosecondArray;
-        use parquet::basic::{Repetition, Type as PhysicalType};
-        use parquet::data_type::Int96Type;
-        use parquet::file::writer::SerializedFileWriter;
-        use parquet::schema::types::Type as SchemaType;
-
         let tmp_dir = TempDir::new().unwrap();
         let table_location = tmp_dir.path().to_str().unwrap().to_string();
         let file_path = format!("{table_location}/struct_int96.parquet");
@@ -5030,20 +5136,7 @@ message schema {
             .build()
             .unwrap();
 
-        // Year 3333 (Julian day 2_953_529), noon — outside the i64 nanosecond range
-        const UNIX_EPOCH_JULIAN: i64 = 2_440_588;
-        const MICROS_PER_DAY: i64 = 86_400_000_000;
-        let nanos_within_day = 43_200_000_000_000u64;
-        let julian_day = 2_953_529u32;
-        let expected_micros = (julian_day as i64 - UNIX_EPOCH_JULIAN) * MICROS_PER_DAY
-            + (nanos_within_day / 1_000) as i64;
-
-        let mut int96_val = parquet::data_type::Int96::new();
-        int96_val.set_data(
-            (nanos_within_day & 0xFFFFFFFF) as u32,
-            (nanos_within_day >> 32) as u32,
-            julian_day,
-        );
+        let (int96_val, expected_micros) = make_int96_test_value();
 
         let file = File::create(&file_path).unwrap();
         let mut writer =
@@ -5082,34 +5175,10 @@ message schema {
                 .unwrap(),
         );
 
-        let file_io = FileIO::new_with_fs();
-        let reader = ArrowReaderBuilder::new(file_io).build();
-
-        // Only project the top-level struct field; nested fields are included implicitly
-        let task = FileScanTask {
-            file_size_in_bytes: std::fs::metadata(&file_path).unwrap().len(),
-            start: 0,
-            length: std::fs::metadata(&file_path).unwrap().len(),
-            record_count: None,
-            data_file_path: file_path,
-            data_file_format: DataFileFormat::Parquet,
-            schema: iceberg_schema,
-            project_field_ids: vec![1],
-            predicate: None,
-            deletes: vec![],
-            partition: None,
-            partition_spec: None,
-            name_mapping: None,
-            case_sensitive: false,
-        };
-
-        let tasks = Box::pin(futures::stream::iter(vec![Ok(task)])) as FileScanTaskStream;
-        let batches: Vec<RecordBatch> = reader.read(tasks).unwrap().try_collect().await.unwrap();
+        let batches = read_int96_batches(&file_path, iceberg_schema, vec![1]).await;
 
         assert_eq!(batches.len(), 1);
-        let batch = &batches[0];
-
-        let struct_array = batch
+        let struct_array = batches[0]
             .column(0)
             .as_any()
             .downcast_ref::<arrow_array::StructArray>()
@@ -5127,4 +5196,231 @@ message schema {
             ts_array.value(0)
         );
     }
+
+    /// Test reading INT96 timestamps inside a list field (3-level Parquet LIST encoding).
+    #[tokio::test]
+    async fn test_read_int96_timestamps_in_list() {
+        let tmp_dir = TempDir::new().unwrap();
+        let table_location = tmp_dir.path().to_str().unwrap().to_string();
+        let file_path = format!("{table_location}/list_int96.parquet");
+
+        // 3-level LIST encoding:
+        //   optional group timestamps (LIST) {
+        //     repeated group list {
+        //       optional int96 element;
+        //     }
+        //   }
+        let element_type = SchemaType::primitive_type_builder("element", PhysicalType::INT96)
+            .with_repetition(Repetition::OPTIONAL)
+            .with_id(Some(2))
+            .build()
+            .unwrap();
+
+        let list_group = SchemaType::group_type_builder("list")
+            .with_repetition(Repetition::REPEATED)
+            .with_fields(vec![Arc::new(element_type)])
+            .build()
+            .unwrap();
+
+        let list_type = SchemaType::group_type_builder("timestamps")
+            .with_repetition(Repetition::OPTIONAL)
+            .with_id(Some(1))
+            .with_logical_type(Some(parquet::basic::LogicalType::List))
+            .with_fields(vec![Arc::new(list_group)])
+            .build()
+            .unwrap();
+
+        let parquet_schema = SchemaType::group_type_builder("schema")
+            .with_fields(vec![Arc::new(list_type)])
+            .build()
+            .unwrap();
+
+        let (int96_val, expected_micros) = make_int96_test_value();
+
+        let file = File::create(&file_path).unwrap();
+        let mut writer =
+            SerializedFileWriter::new(file, Arc::new(parquet_schema), Default::default()).unwrap();
+
+        // Write a single row with a list containing one INT96 element
+        let mut row_group = writer.next_row_group().unwrap();
+        {
+            let mut col = row_group.next_column().unwrap().unwrap();
+            col.typed::<Int96Type>()
+                .write_batch(&[int96_val], Some(&[3i16]), Some(&[0i16]))
+                .unwrap();
+            col.close().unwrap();
+        }
+        row_group.close().unwrap();
+        writer.close().unwrap();
+
+        let iceberg_schema = Arc::new(
+            Schema::builder()
+                .with_schema_id(1)
+                .with_fields(vec![
+                    NestedField::optional(
+                        1,
+                        "timestamps",
+                        Type::List(crate::spec::ListType {
+                            element_field: NestedField::optional(
+                                2,
+                                "element",
+                                Type::Primitive(PrimitiveType::Timestamp),
+                            )
+                            .into(),
+                        }),
+                    )
+                    .into(),
+                ])
+                .build()
+                .unwrap(),
+        );
+
+        let batches = read_int96_batches(&file_path, iceberg_schema, vec![1]).await;
+
+        assert_eq!(batches.len(), 1);
+        let list_array = batches[0]
+            .column(0)
+            .as_any()
+            .downcast_ref::<arrow_array::ListArray>()
+            .expect("Expected ListArray");
+        let ts_array = list_array
+            .values()
+            .as_any()
+            .downcast_ref::<TimestampMicrosecondArray>()
+            .expect("Expected TimestampMicrosecondArray inside list");
+
+        assert_eq!(
+            ts_array.value(0),
+            expected_micros,
+            "INT96 in list: got {}, expected {expected_micros}",
+            ts_array.value(0)
+        );
+    }
+
+    /// Test reading INT96 timestamps as map values.
+    #[tokio::test]
+    async fn test_read_int96_timestamps_in_map() {
+        let tmp_dir = TempDir::new().unwrap();
+        let table_location = tmp_dir.path().to_str().unwrap().to_string();
+        let file_path = format!("{table_location}/map_int96.parquet");
+
+        // MAP encoding:
+        //   optional group ts_map (MAP) {
+        //     repeated group key_value {
+        //       required binary key (UTF8);
+        //       optional int96 value;
+        //     }
+        //   }
+        let key_type = SchemaType::primitive_type_builder("key", PhysicalType::BYTE_ARRAY)
+            .with_repetition(Repetition::REQUIRED)
+            .with_logical_type(Some(parquet::basic::LogicalType::String))
+            .with_id(Some(2))
+            .build()
+            .unwrap();
+
+        let value_type = SchemaType::primitive_type_builder("value", PhysicalType::INT96)
+            .with_repetition(Repetition::OPTIONAL)
+            .with_id(Some(3))
+            .build()
+            .unwrap();
+
+        let key_value_group = SchemaType::group_type_builder("key_value")
+            .with_repetition(Repetition::REPEATED)
+            .with_fields(vec![Arc::new(key_type), Arc::new(value_type)])
+            .build()
+            .unwrap();
+
+        let map_type = SchemaType::group_type_builder("ts_map")
+            .with_repetition(Repetition::OPTIONAL)
+            .with_id(Some(1))
+            .with_logical_type(Some(parquet::basic::LogicalType::Map))
+            .with_fields(vec![Arc::new(key_value_group)])
+            .build()
+            .unwrap();
+
+        let parquet_schema = SchemaType::group_type_builder("schema")
+            .with_fields(vec![Arc::new(map_type)])
+            .build()
+            .unwrap();
+
+        let (int96_val, expected_micros) = make_int96_test_value();
+
+        let file = File::create(&file_path).unwrap();
+        let mut writer =
+            SerializedFileWriter::new(file, Arc::new(parquet_schema), Default::default()).unwrap();
+
+        // Write a single row with a map containing one key-value pair
+        let mut row_group = writer.next_row_group().unwrap();
+        {
+            // key column
+            let mut col = row_group.next_column().unwrap().unwrap();
+            col.typed::<ByteArrayType>()
+                .write_batch(
+                    &[parquet::data_type::ByteArray::from("event_time")],
+                    Some(&[2i16]),
+                    Some(&[0i16]),
+                )
+                .unwrap();
+            col.close().unwrap();
+        }
+        {
+            // value column (INT96)
+            let mut col = row_group.next_column().unwrap().unwrap();
+            col.typed::<Int96Type>()
+                .write_batch(&[int96_val], Some(&[3i16]), Some(&[0i16]))
+                .unwrap();
+            col.close().unwrap();
+        }
+        row_group.close().unwrap();
+        writer.close().unwrap();
+
+        let iceberg_schema = Arc::new(
+            Schema::builder()
+                .with_schema_id(1)
+                .with_fields(vec![
+                    NestedField::optional(
+                        1,
+                        "ts_map",
+                        Type::Map(crate::spec::MapType {
+                            key_field: NestedField::required(
+                                2,
+                                "key",
+                                Type::Primitive(PrimitiveType::String),
+                            )
+                            .into(),
+                            value_field: NestedField::optional(
+                                3,
+                                "value",
+                                Type::Primitive(PrimitiveType::Timestamp),
+                            )
+                            .into(),
+                        }),
+                    )
+                    .into(),
+                ])
+                .build()
+                .unwrap(),
+        );
+
+        let batches = read_int96_batches(&file_path, iceberg_schema, vec![1]).await;
+
+        assert_eq!(batches.len(), 1);
+        let map_array = batches[0]
+            .column(0)
+            .as_any()
+            .downcast_ref::<arrow_array::MapArray>()
+            .expect("Expected MapArray");
+        let ts_array = map_array
+            .values()
+            .as_any()
+            .downcast_ref::<TimestampMicrosecondArray>()
+            .expect("Expected TimestampMicrosecondArray as map values");
+
+        assert_eq!(
+            ts_array.value(0),
+            expected_micros,
+            "INT96 in map: got {}, expected {expected_micros}",
+            ts_array.value(0)
+        );
+    }
 }

From 63b2260bb8b01ed41f7e2a1a9c0867dbe4aa9571 Mon Sep 17 00:00:00 2001
From: Matt Butrovich <mbutrovich@gmail.com>
Date: Tue, 31 Mar 2026 14:42:07 -0400
Subject: [PATCH 4/8] clean up comments

---
 crates/iceberg/src/arrow/reader.rs | 26 ++++++++++++++++----------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/crates/iceberg/src/arrow/reader.rs b/crates/iceberg/src/arrow/reader.rs
index 89bd2168f6..a4be7090e2 100644
--- a/crates/iceberg/src/arrow/reader.rs
+++ b/crates/iceberg/src/arrow/reader.rs
@@ -5020,9 +5020,10 @@ message schema {
 
         let mut row_group = writer.next_row_group().unwrap();
         {
+            // def=1: ts is OPTIONAL and present. No repetition levels (top-level columns).
             let mut col = row_group.next_column().unwrap().unwrap();
             col.typed::<Int96Type>()
-                .write_batch(&int96_values, Some(&vec![1i16; test_data.len()]), None)
+                .write_batch(&int96_values, Some(&vec![1; test_data.len()]), None)
                 .unwrap();
             col.close().unwrap();
         }
@@ -5142,11 +5143,13 @@ message schema {
         let mut writer =
             SerializedFileWriter::new(file, Arc::new(parquet_schema), Default::default()).unwrap();
 
+        // def=1: struct is REQUIRED so no level, ts is OPTIONAL and present (1).
+        // No repetition levels needed (no repeated groups).
         let mut row_group = writer.next_row_group().unwrap();
         {
             let mut col = row_group.next_column().unwrap().unwrap();
             col.typed::<Int96Type>()
-                .write_batch(&[int96_val], Some(&[1i16]), None)
+                .write_batch(&[int96_val], Some(&[1]), None)
                 .unwrap();
             col.close().unwrap();
         }
@@ -5241,12 +5244,14 @@ message schema {
         let mut writer =
             SerializedFileWriter::new(file, Arc::new(parquet_schema), Default::default()).unwrap();
 
-        // Write a single row with a list containing one INT96 element
+        // Write a single row with a list containing one INT96 element.
+        // def=3: list present (1) + repeated group (2) + element present (3)
+        // rep=0: start of a new list
         let mut row_group = writer.next_row_group().unwrap();
         {
             let mut col = row_group.next_column().unwrap().unwrap();
             col.typed::<Int96Type>()
-                .write_batch(&[int96_val], Some(&[3i16]), Some(&[0i16]))
+                .write_batch(&[int96_val], Some(&[3]), Some(&[0]))
                 .unwrap();
             col.close().unwrap();
         }
@@ -5349,25 +5354,26 @@ message schema {
         let mut writer =
             SerializedFileWriter::new(file, Arc::new(parquet_schema), Default::default()).unwrap();
 
-        // Write a single row with a map containing one key-value pair
+        // Write a single row with a map containing one key-value pair.
+        // rep=0 for both columns: start of a new map.
+        // key def=2: map present (1) + key_value entry present (2), key is REQUIRED.
+        // value def=3: map present (1) + key_value entry present (2) + value present (3).
         let mut row_group = writer.next_row_group().unwrap();
         {
-            // key column
             let mut col = row_group.next_column().unwrap().unwrap();
             col.typed::<ByteArrayType>()
                 .write_batch(
                     &[parquet::data_type::ByteArray::from("event_time")],
-                    Some(&[2i16]),
-                    Some(&[0i16]),
+                    Some(&[2]),
+                    Some(&[0]),
                 )
                 .unwrap();
             col.close().unwrap();
         }
         {
-            // value column (INT96)
             let mut col = row_group.next_column().unwrap().unwrap();
             col.typed::<Int96Type>()
-                .write_batch(&[int96_val], Some(&[3i16]), Some(&[0i16]))
+                .write_batch(&[int96_val], Some(&[3]), Some(&[0]))
                 .unwrap();
             col.close().unwrap();
         }

From 516312289b534a731f6ffc3451c771fd09be832f Mon Sep 17 00:00:00 2001
From: Matt Butrovich <mbutrovich@gmail.com>
Date: Tue, 31 Mar 2026 16:33:25 -0400
Subject: [PATCH 5/8] remove error-prone debug_assert

---
 crates/iceberg/src/arrow/reader.rs | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/crates/iceberg/src/arrow/reader.rs b/crates/iceberg/src/arrow/reader.rs
index a4be7090e2..43b51c9f99 100644
--- a/crates/iceberg/src/arrow/reader.rs
+++ b/crates/iceberg/src/arrow/reader.rs
@@ -1364,13 +1364,6 @@ fn coerce_field_at_path(
             // spec for the backward-compatibility rules:
             // https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#lists
             if depth + 2 < parquet_path.len() {
-                debug_assert_eq!(
-                    element_field.name(),
-                    &parquet_path[depth + 2],
-                    "Arrow list element name '{}' does not match Parquet path segment '{}'",
-                    element_field.name(),
-                    &parquet_path[depth + 2]
-                );
                 let (new_element, changed) =
                     coerce_field_at_path(element_field, parquet_path, depth + 2, iceberg_schema);
                 if changed {

From 7abe9e839b3619fd0ec6684a0a3311572b539b10 Mon Sep 17 00:00:00 2001
From: Matt Butrovich <mbutrovich@gmail.com>
Date: Wed, 1 Apr 2026 11:11:40 -0400
Subject: [PATCH 6/8] address PR feedback, add comments.

---
 crates/iceberg/src/arrow/reader.rs | 306 +++++++++++++++--------------
 crates/iceberg/src/arrow/schema.rs |   5 +-
 2 files changed, 160 insertions(+), 151 deletions(-)

diff --git a/crates/iceberg/src/arrow/reader.rs b/crates/iceberg/src/arrow/reader.rs
index 43b51c9f99..c42460df1a 100644
--- a/crates/iceberg/src/arrow/reader.rs
+++ b/crates/iceberg/src/arrow/reader.rs
@@ -40,15 +40,15 @@ use parquet::arrow::arrow_reader::{
 };
 use parquet::arrow::async_reader::AsyncFileReader;
 use parquet::arrow::{PARQUET_FIELD_ID_META_KEY, ParquetRecordBatchStreamBuilder, ProjectionMask};
-use parquet::basic::Type as PhysicalType;
 use parquet::file::metadata::{
     PageIndexPolicy, ParquetMetaData, ParquetMetaDataReader, RowGroupMetaData,
 };
-use parquet::schema::types::{ColumnPath, SchemaDescriptor, Type as ParquetType};
+use parquet::schema::types::{SchemaDescriptor, Type as ParquetType};
 use typed_builder::TypedBuilder;
 
 use crate::arrow::caching_delete_file_loader::CachingDeleteFileLoader;
 use crate::arrow::record_batch_transformer::RecordBatchTransformerBuilder;
+use crate::arrow::schema::{ArrowSchemaVisitor, DEFAULT_MAP_FIELD_NAME, visit_schema};
 use crate::arrow::{arrow_schema_to_schema, get_arrow_datum};
 use crate::delete_vector::DeleteVector;
 use crate::error::Result;
@@ -390,11 +390,9 @@ impl ArrowReader {
 
         // Coerce INT96 timestamp columns to the resolution specified by the Iceberg schema.
         // This must happen before building the stream reader to avoid i64 overflow in arrow-rs.
-        let arrow_metadata = if let Some(coerced_schema) = coerce_int96_timestamps(
-            arrow_metadata.metadata().file_metadata().schema_descr(),
-            arrow_metadata.schema(),
-            &task.schema,
-        ) {
+        let arrow_metadata = if let Some(coerced_schema) =
+            coerce_int96_timestamps(arrow_metadata.schema(), &task.schema)
+        {
             let options = ArrowReaderOptions::new().with_schema(Arc::clone(&coerced_schema));
             ArrowReaderMetadata::try_new(Arc::clone(arrow_metadata.metadata()), options).map_err(
                 |e| {
@@ -1270,152 +1268,51 @@ fn add_fallback_field_ids_to_arrow_schema(arrow_schema: &ArrowSchemaRef) -> Arc<
 /// - Iceberg spec primitive types: <https://iceberg.apache.org/spec/#primitive-types>
 /// - arrow-rs schema hint support: <https://github.com/apache/arrow-rs/pull/7285>
 fn coerce_int96_timestamps(
-    parquet_schema: &SchemaDescriptor,
     arrow_schema: &ArrowSchemaRef,
     iceberg_schema: &Schema,
 ) -> Option<Arc<ArrowSchema>> {
-    let int96_paths: Vec<ColumnPath> = parquet_schema
-        .columns()
-        .iter()
-        .filter(|col| col.physical_type() == PhysicalType::INT96)
-        .map(|col| col.path().clone())
-        .collect();
-
-    if int96_paths.is_empty() {
-        return None;
-    }
-
-    let mut fields: Vec<FieldRef> = arrow_schema.fields().iter().cloned().collect();
-    let mut any_changed = false;
-
-    for path in &int96_paths {
-        let parts = path.parts();
-        if let Some(idx) = fields.iter().position(|f| f.name() == &parts[0]) {
-            let (new_field, changed) = coerce_field_at_path(&fields[idx], parts, 0, iceberg_schema);
-            if changed {
-                fields[idx] = new_field;
-                any_changed = true;
-            }
-        }
-    }
-
-    if any_changed {
-        Some(Arc::new(ArrowSchema::new_with_metadata(
-            fields,
-            arrow_schema.metadata().clone(),
-        )))
+    let mut visitor = Int96CoercionVisitor::new(iceberg_schema);
+    let coerced = visit_schema(arrow_schema, &mut visitor).ok()?;
+    if visitor.changed {
+        Some(Arc::new(coerced))
     } else {
         None
     }
 }
 
-/// Navigate an Arrow field tree using a Parquet column path and coerce the leaf
-/// INT96 `Timestamp(Nanosecond)` to the resolution specified by the Iceberg schema.
-///
-/// Parquet column paths include intermediate group names for nested types (e.g. the
-/// repeated group in LIST encoding). This function accounts for those extra levels
-/// when descending through List, LargeList, and Map Arrow types.
-fn coerce_field_at_path(
-    field: &FieldRef,
-    parquet_path: &[String],
-    depth: usize,
-    iceberg_schema: &Schema,
-) -> (FieldRef, bool) {
-    if depth == parquet_path.len() - 1 {
-        return coerce_timestamp_field(field, iceberg_schema);
-    }
-
-    match field.data_type() {
-        DataType::Struct(fields) => {
-            let child_name = &parquet_path[depth + 1];
-            let mut new_fields: Vec<FieldRef> = fields.iter().cloned().collect();
-            let mut changed = false;
-
-            if let Some(idx) = new_fields.iter().position(|f| f.name() == child_name) {
-                let (new_child, child_changed) =
-                    coerce_field_at_path(&new_fields[idx], parquet_path, depth + 1, iceberg_schema);
-                if child_changed {
-                    new_fields[idx] = new_child;
-                    changed = true;
-                }
-            }
+/// Visitor that coerces `Timestamp(Nanosecond)` Arrow fields to the resolution
+/// indicated by the Iceberg schema. Follows the same pattern as `MetadataStripVisitor`.
+struct Int96CoercionVisitor<'a> {
+    iceberg_schema: &'a Schema,
+    field_stack: Vec<Field>,
+    changed: bool,
+}
 
-            if changed {
-                let new_field = Arc::new(
-                    Field::new(
-                        field.name(),
-                        DataType::Struct(Fields::from(new_fields)),
-                        field.is_nullable(),
-                    )
-                    .with_metadata(field.metadata().clone()),
-                );
-                (new_field, true)
-            } else {
-                (Arc::clone(field), false)
-            }
-        }
-        DataType::List(element_field) | DataType::LargeList(element_field) => {
-            // Parquet 3-level LIST encoding inserts a repeated group between the list
-            // and its element, e.g. `my_list.list.element` where `list` is the
-            // intermediate group. The group name varies across writers — the spec says
-            // "list" but legacy data uses "element", "array", `<parent>_tuple`, etc.
-            // We skip it (depth + 1) since we navigate by Parquet ColumnPath parts
-            // which contain the actual name from the file. See the Parquet LogicalTypes
-            // spec for the backward-compatibility rules:
-            // https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#lists
-            if depth + 2 < parquet_path.len() {
-                let (new_element, changed) =
-                    coerce_field_at_path(element_field, parquet_path, depth + 2, iceberg_schema);
-                if changed {
-                    let new_type = match field.data_type() {
-                        DataType::List(_) => DataType::List(new_element),
-                        DataType::LargeList(_) => DataType::LargeList(new_element),
-                        _ => unreachable!(),
-                    };
-                    let new_field = Arc::new(
-                        Field::new(field.name(), new_type, field.is_nullable())
-                            .with_metadata(field.metadata().clone()),
-                    );
-                    return (new_field, true);
-                }
-            }
-            (Arc::clone(field), false)
-        }
-        DataType::Map(entries_field, sorted) => {
-            // The entries field is a struct containing key/value fields. The Parquet
-            // path segment at depth + 1 is the entries group name (e.g. "key_value"),
-            // which corresponds to the Arrow entries struct field.
-            if depth + 1 < parquet_path.len() {
-                let (new_entries, changed) =
-                    coerce_field_at_path(entries_field, parquet_path, depth + 1, iceberg_schema);
-                if changed {
-                    let new_field = Arc::new(
-                        Field::new(
-                            field.name(),
-                            DataType::Map(new_entries, *sorted),
-                            field.is_nullable(),
-                        )
-                        .with_metadata(field.metadata().clone()),
-                    );
-                    return (new_field, true);
-                }
-            }
-            (Arc::clone(field), false)
+impl<'a> Int96CoercionVisitor<'a> {
+    fn new(iceberg_schema: &'a Schema) -> Self {
+        Self {
+            iceberg_schema,
+            field_stack: Vec::new(),
+            changed: false,
         }
-        _ => (Arc::clone(field), false),
     }
-}
 
-/// Coerce a single `Timestamp(Nanosecond)` field to the resolution indicated by the
-/// Iceberg schema. Falls back to microsecond when field IDs are unavailable, matching
-/// Iceberg Java behavior.
-fn coerce_timestamp_field(field: &FieldRef, iceberg_schema: &Schema) -> (FieldRef, bool) {
-    if let DataType::Timestamp(TimeUnit::Nanosecond, tz) = field.data_type() {
-        let target_unit = field
+    /// Determine the target TimeUnit for a Timestamp(Nanosecond) field based on the
+    /// Iceberg schema. Falls back to microsecond when field IDs are unavailable,
+    /// matching Iceberg Java behavior.
+    fn target_unit(&self, field: &Field) -> Option<TimeUnit> {
+        if !matches!(
+            field.data_type(),
+            DataType::Timestamp(TimeUnit::Nanosecond, _)
+        ) {
+            return None;
+        }
+
+        let target = field
             .metadata()
             .get(PARQUET_FIELD_ID_META_KEY)
             .and_then(|id_str| id_str.parse::<i32>().ok())
-            .and_then(|field_id| iceberg_schema.field_by_id(field_id))
+            .and_then(|field_id| self.iceberg_schema.field_by_id(field_id))
             .and_then(|f| match &*f.field_type {
                 Type::Primitive(PrimitiveType::Timestamp | PrimitiveType::Timestamptz) => {
                     Some(TimeUnit::Microsecond)
@@ -1428,21 +1325,130 @@ fn coerce_timestamp_field(field: &FieldRef, iceberg_schema: &Schema) -> (FieldRe
             // Iceberg Java reads INT96 as microseconds by default
             .unwrap_or(TimeUnit::Microsecond);
 
-        if target_unit == TimeUnit::Nanosecond {
-            return (Arc::clone(field), false);
+        if target == TimeUnit::Nanosecond {
+            None
+        } else {
+            Some(target)
         }
+    }
+}
 
-        let new_field = Arc::new(
-            Field::new(
-                field.name(),
-                DataType::Timestamp(target_unit, tz.clone()),
-                field.is_nullable(),
-            )
-            .with_metadata(field.metadata().clone()),
+impl ArrowSchemaVisitor for Int96CoercionVisitor<'_> {
+    type T = Field;
+    type U = ArrowSchema;
+
+    fn before_field(&mut self, field: &Field) -> Result<()> {
+        self.field_stack.push(field.as_ref().clone());
+        Ok(())
+    }
+
+    fn before_list_element(&mut self, field: &Field) -> Result<()> {
+        self.field_stack.push(field.as_ref().clone());
+        Ok(())
+    }
+
+    fn before_map_key(&mut self, field: &Field) -> Result<()> {
+        self.field_stack.push(field.as_ref().clone());
+        Ok(())
+    }
+
+    fn before_map_value(&mut self, field: &Field) -> Result<()> {
+        self.field_stack.push(field.as_ref().clone());
+        Ok(())
+    }
+
+    fn schema(&mut self, schema: &ArrowSchema, values: Vec<Self::T>) -> Result<Self::U> {
+        Ok(ArrowSchema::new_with_metadata(
+            values,
+            schema.metadata().clone(),
+        ))
+    }
+
+    fn r#struct(&mut self, _fields: &Fields, results: Vec<Self::T>) -> Result<Self::T> {
+        let field_info = self
+            .field_stack
+            .pop()
+            .ok_or_else(|| Error::new(ErrorKind::Unexpected, "Field stack underflow in struct"))?;
+        Ok(Field::new(
+            field_info.name(),
+            DataType::Struct(Fields::from(results)),
+            field_info.is_nullable(),
+        )
+        .with_metadata(field_info.metadata().clone()))
+    }
+
+    fn list(&mut self, list: &DataType, value: Self::T) -> Result<Self::T> {
+        let field_info = self
+            .field_stack
+            .pop()
+            .ok_or_else(|| Error::new(ErrorKind::Unexpected, "Field stack underflow in list"))?;
+        let list_type = match list {
+            DataType::List(_) => DataType::List(Arc::new(value)),
+            DataType::LargeList(_) => DataType::LargeList(Arc::new(value)),
+            DataType::FixedSizeList(_, size) => DataType::FixedSizeList(Arc::new(value), *size),
+            _ => {
+                return Err(Error::new(
+                    ErrorKind::Unexpected,
+                    format!("Expected list type, got {list}"),
+                ));
+            }
+        };
+        Ok(
+            Field::new(field_info.name(), list_type, field_info.is_nullable())
+                .with_metadata(field_info.metadata().clone()),
+        )
+    }
+
+    fn map(&mut self, map: &DataType, key_value: Self::T, value: Self::T) -> Result<Self::T> {
+        let field_info = self
+            .field_stack
+            .pop()
+            .ok_or_else(|| Error::new(ErrorKind::Unexpected, "Field stack underflow in map"))?;
+        let sorted = match map {
+            DataType::Map(_, sorted) => *sorted,
+            _ => {
+                return Err(Error::new(
+                    ErrorKind::Unexpected,
+                    format!("Expected map type, got {map}"),
+                ));
+            }
+        };
+        let struct_field = Field::new(
+            DEFAULT_MAP_FIELD_NAME,
+            DataType::Struct(Fields::from(vec![key_value, value])),
+            false,
         );
-        (new_field, true)
-    } else {
-        (Arc::clone(field), false)
+        Ok(Field::new(
+            field_info.name(),
+            DataType::Map(Arc::new(struct_field), sorted),
+            field_info.is_nullable(),
+        )
+        .with_metadata(field_info.metadata().clone()))
+    }
+
+    fn primitive(&mut self, p: &DataType) -> Result<Self::T> {
+        let field_info = self.field_stack.pop().ok_or_else(|| {
+            Error::new(ErrorKind::Unexpected, "Field stack underflow in primitive")
+        })?;
+
+        if let Some(target_unit) = self.target_unit(&field_info) {
+            let tz = match field_info.data_type() {
+                DataType::Timestamp(_, tz) => tz.clone(),
+                _ => None,
+            };
+            self.changed = true;
+            Ok(Field::new(
+                field_info.name(),
+                DataType::Timestamp(target_unit, tz),
+                field_info.is_nullable(),
+            )
+            .with_metadata(field_info.metadata().clone()))
+        } else {
+            Ok(
+                Field::new(field_info.name(), p.clone(), field_info.is_nullable())
+                    .with_metadata(field_info.metadata().clone()),
+            )
+        }
     }
 }
 
diff --git a/crates/iceberg/src/arrow/schema.rs b/crates/iceberg/src/arrow/schema.rs
index bd9e249f48..f96c29ab4a 100644
--- a/crates/iceberg/src/arrow/schema.rs
+++ b/crates/iceberg/src/arrow/schema.rs
@@ -199,7 +199,10 @@ fn visit_struct<V: ArrowSchemaVisitor>(fields: &Fields, visitor: &mut V) -> Resu
 }
 
 /// Visit schema in post order.
-fn visit_schema<V: ArrowSchemaVisitor>(schema: &ArrowSchema, visitor: &mut V) -> Result<V::U> {
+pub(crate) fn visit_schema<V: ArrowSchemaVisitor>(
+    schema: &ArrowSchema,
+    visitor: &mut V,
+) -> Result<V::U> {
     let mut results = Vec::with_capacity(schema.fields().len());
     for field in schema.fields() {
         visitor.before_field(field)?;

From 11f532057b5a15234f7bedcd7531103fa3222c6e Mon Sep 17 00:00:00 2001
From: Matt Butrovich <mbutrovich@gmail.com>
Date: Thu, 2 Apr 2026 13:47:23 -0400
Subject: [PATCH 7/8] address PR feedback.

---
 crates/iceberg/src/arrow/int96.rs  | 804 +++++++++++++++++++++++++++++
 crates/iceberg/src/arrow/mod.rs    |   1 +
 crates/iceberg/src/arrow/reader.rs | 749 +--------------------------
 3 files changed, 810 insertions(+), 744 deletions(-)
 create mode 100644 crates/iceberg/src/arrow/int96.rs

diff --git a/crates/iceberg/src/arrow/int96.rs b/crates/iceberg/src/arrow/int96.rs
new file mode 100644
index 0000000000..57516a63d9
--- /dev/null
+++ b/crates/iceberg/src/arrow/int96.rs
@@ -0,0 +1,804 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! INT96 timestamp coercion for Parquet files.
+
+use std::sync::Arc;
+
+use arrow_schema::{
+    DataType, Field, Fields, Schema as ArrowSchema, SchemaRef as ArrowSchemaRef, TimeUnit,
+};
+use parquet::arrow::PARQUET_FIELD_ID_META_KEY;
+
+use crate::arrow::schema::{ArrowSchemaVisitor, DEFAULT_MAP_FIELD_NAME, visit_schema};
+use crate::error::Result;
+use crate::spec::{PrimitiveType, Schema, Type};
+use crate::{Error, ErrorKind};
+
+/// Coerce Arrow schema types for INT96 columns to match the Iceberg table schema.
+///
+/// arrow-rs defaults INT96 to `Timestamp(Nanosecond)`, which overflows i64 for dates outside
+/// ~1677-2262. We use arrow-rs's schema hint mechanism to read INT96 at the resolution
+/// specified by the Iceberg schema (`timestamp` → microsecond, `timestamp_ns` → nanosecond).
+///
+/// Iceberg Java handles this differently: it bypasses parquet-mr with a custom column reader
+/// (`GenericParquetReaders.TimestampInt96Reader`). We achieve the same result via schema hints.
+///
+/// References:
+/// - Iceberg spec primitive types: <https://iceberg.apache.org/spec/#primitive-types>
+/// - arrow-rs schema hint support: <https://github.com/apache/arrow-rs/pull/7285>
+pub(crate) fn coerce_int96_timestamps(
+    arrow_schema: &ArrowSchemaRef,
+    iceberg_schema: &Schema,
+) -> Option<Arc<ArrowSchema>> {
+    let mut visitor = Int96CoercionVisitor::new(iceberg_schema);
+    let coerced = visit_schema(arrow_schema, &mut visitor).ok()?;
+    if visitor.changed {
+        Some(Arc::new(coerced))
+    } else {
+        None
+    }
+}
+
+/// Visitor that coerces `Timestamp(Nanosecond)` Arrow fields to the resolution
+/// indicated by the Iceberg schema.
+struct Int96CoercionVisitor<'a> {
+    iceberg_schema: &'a Schema,
+    // TODO(#2310): use FieldRef (Arc<Field>) once ArrowSchemaVisitor passes FieldRef.
+    field_stack: Vec<Field>,
+    changed: bool,
+}
+
+impl<'a> Int96CoercionVisitor<'a> {
+    fn new(iceberg_schema: &'a Schema) -> Self {
+        Self {
+            iceberg_schema,
+            field_stack: Vec::new(),
+            changed: false,
+        }
+    }
+
+    /// Determine the target TimeUnit for a Timestamp(Nanosecond) field based on the
+    /// Iceberg schema. Falls back to microsecond when field IDs are unavailable,
+    /// matching Iceberg Java behavior.
+    fn target_unit(&self, field: &Field) -> Option<TimeUnit> {
+        if !matches!(
+            field.data_type(),
+            DataType::Timestamp(TimeUnit::Nanosecond, _)
+        ) {
+            return None;
+        }
+
+        let target = field
+            .metadata()
+            .get(PARQUET_FIELD_ID_META_KEY)
+            .and_then(|id_str| id_str.parse::<i32>().ok())
+            .and_then(|field_id| self.iceberg_schema.field_by_id(field_id))
+            .and_then(|f| match &*f.field_type {
+                Type::Primitive(PrimitiveType::Timestamp | PrimitiveType::Timestamptz) => {
+                    Some(TimeUnit::Microsecond)
+                }
+                Type::Primitive(PrimitiveType::TimestampNs | PrimitiveType::TimestamptzNs) => {
+                    Some(TimeUnit::Nanosecond)
+                }
+                _ => None,
+            })
+            // Iceberg Java reads INT96 as microseconds by default
+            .unwrap_or(TimeUnit::Microsecond);
+
+        if target == TimeUnit::Nanosecond {
+            None
+        } else {
+            Some(target)
+        }
+    }
+}
+
+impl ArrowSchemaVisitor for Int96CoercionVisitor<'_> {
+    type T = Field;
+    type U = ArrowSchema;
+
+    fn before_field(&mut self, field: &Field) -> Result<()> {
+        self.field_stack.push(field.as_ref().clone());
+        Ok(())
+    }
+
+    fn after_field(&mut self, _field: &Field) -> Result<()> {
+        self.field_stack.pop();
+        Ok(())
+    }
+
+    fn before_list_element(&mut self, field: &Field) -> Result<()> {
+        self.field_stack.push(field.as_ref().clone());
+        Ok(())
+    }
+
+    fn after_list_element(&mut self, _field: &Field) -> Result<()> {
+        self.field_stack.pop();
+        Ok(())
+    }
+
+    fn before_map_key(&mut self, field: &Field) -> Result<()> {
+        self.field_stack.push(field.as_ref().clone());
+        Ok(())
+    }
+
+    fn after_map_key(&mut self, _field: &Field) -> Result<()> {
+        self.field_stack.pop();
+        Ok(())
+    }
+
+    fn before_map_value(&mut self, field: &Field) -> Result<()> {
+        self.field_stack.push(field.as_ref().clone());
+        Ok(())
+    }
+
+    fn after_map_value(&mut self, _field: &Field) -> Result<()> {
+        self.field_stack.pop();
+        Ok(())
+    }
+
+    fn schema(&mut self, schema: &ArrowSchema, values: Vec<Field>) -> Result<ArrowSchema> {
+        Ok(ArrowSchema::new_with_metadata(
+            values,
+            schema.metadata().clone(),
+        ))
+    }
+
+    fn r#struct(&mut self, _fields: &Fields, results: Vec<Field>) -> Result<Field> {
+        let field_info = self
+            .field_stack
+            .last()
+            .ok_or_else(|| Error::new(ErrorKind::Unexpected, "Field stack underflow in struct"))?;
+        Ok(Field::new(
+            field_info.name(),
+            DataType::Struct(Fields::from(results)),
+            field_info.is_nullable(),
+        )
+        .with_metadata(field_info.metadata().clone()))
+    }
+
+    fn list(&mut self, list: &DataType, value: Field) -> Result<Field> {
+        let field_info = self
+            .field_stack
+            .last()
+            .ok_or_else(|| Error::new(ErrorKind::Unexpected, "Field stack underflow in list"))?;
+        let list_type = match list {
+            DataType::List(_) => DataType::List(Arc::new(value)),
+            DataType::LargeList(_) => DataType::LargeList(Arc::new(value)),
+            DataType::FixedSizeList(_, size) => DataType::FixedSizeList(Arc::new(value), *size),
+            _ => {
+                return Err(Error::new(
+                    ErrorKind::Unexpected,
+                    format!("Expected list type, got {list}"),
+                ));
+            }
+        };
+        Ok(
+            Field::new(field_info.name(), list_type, field_info.is_nullable())
+                .with_metadata(field_info.metadata().clone()),
+        )
+    }
+
+    fn map(&mut self, map: &DataType, key_value: Field, value: Field) -> Result<Field> {
+        let field_info = self
+            .field_stack
+            .last()
+            .ok_or_else(|| Error::new(ErrorKind::Unexpected, "Field stack underflow in map"))?;
+        let sorted = match map {
+            DataType::Map(_, sorted) => *sorted,
+            _ => {
+                return Err(Error::new(
+                    ErrorKind::Unexpected,
+                    format!("Expected map type, got {map}"),
+                ));
+            }
+        };
+        let struct_field = Field::new(
+            DEFAULT_MAP_FIELD_NAME,
+            DataType::Struct(Fields::from(vec![key_value, value])),
+            false,
+        );
+        Ok(Field::new(
+            field_info.name(),
+            DataType::Map(Arc::new(struct_field), sorted),
+            field_info.is_nullable(),
+        )
+        .with_metadata(field_info.metadata().clone()))
+    }
+
+    fn primitive(&mut self, p: &DataType) -> Result<Field> {
+        let field_info = self.field_stack.last().ok_or_else(|| {
+            Error::new(ErrorKind::Unexpected, "Field stack underflow in primitive")
+        })?;
+
+        if let Some(target_unit) = self.target_unit(field_info) {
+            let tz = match field_info.data_type() {
+                DataType::Timestamp(_, tz) => tz.clone(),
+                _ => None,
+            };
+            self.changed = true;
+            Ok(Field::new(
+                field_info.name(),
+                DataType::Timestamp(target_unit, tz),
+                field_info.is_nullable(),
+            )
+            .with_metadata(field_info.metadata().clone()))
+        } else {
+            Ok(
+                Field::new(field_info.name(), p.clone(), field_info.is_nullable())
+                    .with_metadata(field_info.metadata().clone()),
+            )
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::fs::File;
+    use std::sync::Arc;
+
+    use arrow_array::TimestampMicrosecondArray;
+    use futures::TryStreamExt;
+    use parquet::basic::{Repetition, Type as PhysicalType};
+    use parquet::data_type::{ByteArrayType, Int32Type, Int96, Int96Type};
+    use parquet::file::writer::SerializedFileWriter;
+    use parquet::schema::types::Type as SchemaType;
+    use tempfile::TempDir;
+
+    use crate::arrow::ArrowReaderBuilder;
+    use crate::io::FileIO;
+    use crate::scan::{FileScanTask, FileScanTaskStream};
+    use crate::spec::{DataFileFormat, NestedField, PrimitiveType, Schema, SchemaRef, Type};
+
+    // INT96 encoding: [nanos_low_u32, nanos_high_u32, julian_day_u32]
+    // Julian day 2_440_588 = Unix epoch (1970-01-01)
+    const UNIX_EPOCH_JULIAN: i64 = 2_440_588;
+    const MICROS_PER_DAY: i64 = 86_400_000_000;
+    /// Noon on 3333-01-01 (Julian day 2_953_529) — outside the i64 nanosecond range (~1677-2262).
+    const INT96_TEST_NANOS_WITHIN_DAY: u64 = 43_200_000_000_000;
+    const INT96_TEST_JULIAN_DAY: u32 = 2_953_529;
+
+    /// Build an INT96 value and its expected microsecond interpretation.
+    fn make_int96_test_value() -> (Int96, i64) {
+        let mut val = Int96::new();
+        val.set_data(
+            (INT96_TEST_NANOS_WITHIN_DAY & 0xFFFFFFFF) as u32,
+            (INT96_TEST_NANOS_WITHIN_DAY >> 32) as u32,
+            INT96_TEST_JULIAN_DAY,
+        );
+        let expected_micros = (INT96_TEST_JULIAN_DAY as i64 - UNIX_EPOCH_JULIAN) * MICROS_PER_DAY
+            + (INT96_TEST_NANOS_WITHIN_DAY / 1_000) as i64;
+        (val, expected_micros)
+    }
+
+    /// Read a Parquet file through ArrowReader and return the resulting batches.
+    async fn read_int96_batches(
+        file_path: &str,
+        schema: SchemaRef,
+        project_field_ids: Vec<i32>,
+    ) -> Vec<arrow_array::RecordBatch> {
+        let file_io = FileIO::new_with_fs();
+        let reader = ArrowReaderBuilder::new(file_io).build();
+
+        let file_size = std::fs::metadata(file_path).unwrap().len();
+        let task = FileScanTask {
+            file_size_in_bytes: file_size,
+            start: 0,
+            length: file_size,
+            record_count: None,
+            data_file_path: file_path.to_string(),
+            data_file_format: DataFileFormat::Parquet,
+            schema,
+            project_field_ids,
+            predicate: None,
+            deletes: vec![],
+            partition: None,
+            partition_spec: None,
+            name_mapping: None,
+            case_sensitive: false,
+        };
+
+        let tasks = Box::pin(futures::stream::iter(vec![Ok(task)])) as FileScanTaskStream;
+        reader.read(tasks).unwrap().try_collect().await.unwrap()
+    }
+
+    /// Writes a Parquet file with INT96 timestamps using SerializedFileWriter
+    /// (ArrowWriter cannot write INT96). Returns (file_path, expected_microsecond_values).
+    fn write_int96_parquet_file(
+        table_location: &str,
+        filename: &str,
+        with_field_ids: bool,
+    ) -> (String, Vec<i64>) {
+        let file_path = format!("{table_location}/{filename}");
+
+        let mut ts_builder = SchemaType::primitive_type_builder("ts", PhysicalType::INT96)
+            .with_repetition(Repetition::OPTIONAL);
+        let mut id_builder = SchemaType::primitive_type_builder("id", PhysicalType::INT32)
+            .with_repetition(Repetition::REQUIRED);
+
+        if with_field_ids {
+            ts_builder = ts_builder.with_id(Some(1));
+            id_builder = id_builder.with_id(Some(2));
+        }
+
+        let schema = SchemaType::group_type_builder("schema")
+            .with_fields(vec![
+                Arc::new(ts_builder.build().unwrap()),
+                Arc::new(id_builder.build().unwrap()),
+            ])
+            .build()
+            .unwrap();
+
+        // Dates outside the i64 nanosecond range (~1677-2262) overflow without coercion.
+        const NOON_NANOS: u64 = INT96_TEST_NANOS_WITHIN_DAY;
+        const JULIAN_3333: u32 = INT96_TEST_JULIAN_DAY;
+        const JULIAN_2100: u32 = 2_488_070;
+
+        let test_data: Vec<(u32, u32, u32, i64)> = vec![
+            // 3333-01-01 00:00:00
+            (
+                0,
+                0,
+                JULIAN_3333,
+                (JULIAN_3333 as i64 - UNIX_EPOCH_JULIAN) * MICROS_PER_DAY,
+            ),
+            // 3333-01-01 12:00:00
+            (
+                (NOON_NANOS & 0xFFFFFFFF) as u32,
+                (NOON_NANOS >> 32) as u32,
+                JULIAN_3333,
+                (JULIAN_3333 as i64 - UNIX_EPOCH_JULIAN) * MICROS_PER_DAY
+                    + (NOON_NANOS / 1_000) as i64,
+            ),
+            // 2100-01-01 00:00:00
+            (
+                0,
+                0,
+                JULIAN_2100,
+                (JULIAN_2100 as i64 - UNIX_EPOCH_JULIAN) * MICROS_PER_DAY,
+            ),
+        ];
+
+        let int96_values: Vec<Int96> = test_data
+            .iter()
+            .map(|(lo, hi, day, _)| {
+                let mut v = Int96::new();
+                v.set_data(*lo, *hi, *day);
+                v
+            })
+            .collect();
+
+        let id_values: Vec<i32> = (0..test_data.len() as i32).collect();
+        let expected_micros: Vec<i64> = test_data.iter().map(|(_, _, _, m)| *m).collect();
+
+        let file = File::create(&file_path).unwrap();
+        let mut writer =
+            SerializedFileWriter::new(file, Arc::new(schema), Default::default()).unwrap();
+
+        let mut row_group = writer.next_row_group().unwrap();
+        {
+            // def=1: ts is OPTIONAL and present. No repetition levels (top-level columns).
+            let mut col = row_group.next_column().unwrap().unwrap();
+            col.typed::<Int96Type>()
+                .write_batch(&int96_values, Some(&vec![1; test_data.len()]), None)
+                .unwrap();
+            col.close().unwrap();
+        }
+        {
+            let mut col = row_group.next_column().unwrap().unwrap();
+            col.typed::<Int32Type>()
+                .write_batch(&id_values, None, None)
+                .unwrap();
+            col.close().unwrap();
+        }
+        row_group.close().unwrap();
+        writer.close().unwrap();
+
+        (file_path, expected_micros)
+    }
+
+    /// Read INT96 Parquet file through ArrowReader and verify top-level microsecond timestamps.
+    async fn assert_int96_read_matches(
+        file_path: &str,
+        schema: SchemaRef,
+        project_field_ids: Vec<i32>,
+        expected_micros: &[i64],
+    ) {
+        let batches = read_int96_batches(file_path, schema, project_field_ids).await;
+
+        assert_eq!(batches.len(), 1);
+        let ts_array = batches[0]
+            .column(0)
+            .as_any()
+            .downcast_ref::<TimestampMicrosecondArray>()
+            .expect("Expected TimestampMicrosecondArray");
+
+        for (i, expected) in expected_micros.iter().enumerate() {
+            assert_eq!(
+                ts_array.value(i),
+                *expected,
+                "Row {i}: got {}, expected {expected}",
+                ts_array.value(i)
+            );
+        }
+    }
+
+    /// Files with embedded field IDs (branch 1 of schema resolution).
+    #[tokio::test]
+    async fn test_read_int96_timestamps_with_field_ids() {
+        let schema = Arc::new(
+            Schema::builder()
+                .with_schema_id(1)
+                .with_fields(vec![
+                    NestedField::optional(1, "ts", Type::Primitive(PrimitiveType::Timestamp))
+                        .into(),
+                    NestedField::required(2, "id", Type::Primitive(PrimitiveType::Int)).into(),
+                ])
+                .build()
+                .unwrap(),
+        );
+
+        let tmp_dir = TempDir::new().unwrap();
+        let table_location = tmp_dir.path().to_str().unwrap().to_string();
+        let (file_path, expected_micros) =
+            write_int96_parquet_file(&table_location, "with_ids.parquet", true);
+
+        assert_int96_read_matches(&file_path, schema, vec![1, 2], &expected_micros).await;
+    }
+
+    /// Migrated files without field IDs (branches 2/3 of schema resolution).
+    #[tokio::test]
+    async fn test_read_int96_timestamps_without_field_ids() {
+        let schema = Arc::new(
+            Schema::builder()
+                .with_schema_id(1)
+                .with_fields(vec![
+                    NestedField::optional(1, "ts", Type::Primitive(PrimitiveType::Timestamp))
+                        .into(),
+                    NestedField::required(2, "id", Type::Primitive(PrimitiveType::Int)).into(),
+                ])
+                .build()
+                .unwrap(),
+        );
+
+        let tmp_dir = TempDir::new().unwrap();
+        let table_location = tmp_dir.path().to_str().unwrap().to_string();
+        let (file_path, expected_micros) =
+            write_int96_parquet_file(&table_location, "no_ids.parquet", false);
+
+        assert_int96_read_matches(&file_path, schema, vec![1, 2], &expected_micros).await;
+    }
+
+    /// Test reading INT96 timestamps inside a struct field.
+    #[tokio::test]
+    async fn test_read_int96_timestamps_in_struct() {
+        let tmp_dir = TempDir::new().unwrap();
+        let table_location = tmp_dir.path().to_str().unwrap().to_string();
+        let file_path = format!("{table_location}/struct_int96.parquet");
+
+        let ts_type = SchemaType::primitive_type_builder("ts", PhysicalType::INT96)
+            .with_repetition(Repetition::OPTIONAL)
+            .with_id(Some(2))
+            .build()
+            .unwrap();
+
+        let struct_type = SchemaType::group_type_builder("data")
+            .with_repetition(Repetition::REQUIRED)
+            .with_id(Some(1))
+            .with_fields(vec![Arc::new(ts_type)])
+            .build()
+            .unwrap();
+
+        let parquet_schema = SchemaType::group_type_builder("schema")
+            .with_fields(vec![Arc::new(struct_type)])
+            .build()
+            .unwrap();
+
+        let (int96_val, expected_micros) = make_int96_test_value();
+
+        let file = File::create(&file_path).unwrap();
+        let mut writer =
+            SerializedFileWriter::new(file, Arc::new(parquet_schema), Default::default()).unwrap();
+
+        // def=1: struct is REQUIRED so no level, ts is OPTIONAL and present (1).
+        // No repetition levels needed (no repeated groups).
+        let mut row_group = writer.next_row_group().unwrap();
+        {
+            let mut col = row_group.next_column().unwrap().unwrap();
+            col.typed::<Int96Type>()
+                .write_batch(&[int96_val], Some(&[1]), None)
+                .unwrap();
+            col.close().unwrap();
+        }
+        row_group.close().unwrap();
+        writer.close().unwrap();
+
+        let iceberg_schema = Arc::new(
+            Schema::builder()
+                .with_schema_id(1)
+                .with_fields(vec![
+                    NestedField::required(
+                        1,
+                        "data",
+                        Type::Struct(crate::spec::StructType::new(vec![
+                            NestedField::optional(
+                                2,
+                                "ts",
+                                Type::Primitive(PrimitiveType::Timestamp),
+                            )
+                            .into(),
+                        ])),
+                    )
+                    .into(),
+                ])
+                .build()
+                .unwrap(),
+        );
+
+        let batches = read_int96_batches(&file_path, iceberg_schema, vec![1]).await;
+
+        assert_eq!(batches.len(), 1);
+        let struct_array = batches[0]
+            .column(0)
+            .as_any()
+            .downcast_ref::<arrow_array::StructArray>()
+            .expect("Expected StructArray");
+        let ts_array = struct_array
+            .column(0)
+            .as_any()
+            .downcast_ref::<TimestampMicrosecondArray>()
+            .expect("Expected TimestampMicrosecondArray inside struct");
+
+        assert_eq!(
+            ts_array.value(0),
+            expected_micros,
+            "INT96 in struct: got {}, expected {expected_micros}",
+            ts_array.value(0)
+        );
+    }
+
+    /// Test reading INT96 timestamps inside a list field (3-level Parquet LIST encoding).
+    #[tokio::test]
+    async fn test_read_int96_timestamps_in_list() {
+        let tmp_dir = TempDir::new().unwrap();
+        let table_location = tmp_dir.path().to_str().unwrap().to_string();
+        let file_path = format!("{table_location}/list_int96.parquet");
+
+        // 3-level LIST encoding:
+        //   optional group timestamps (LIST) {
+        //     repeated group list {
+        //       optional int96 element;
+        //     }
+        //   }
+        let element_type = SchemaType::primitive_type_builder("element", PhysicalType::INT96)
+            .with_repetition(Repetition::OPTIONAL)
+            .with_id(Some(2))
+            .build()
+            .unwrap();
+
+        let list_group = SchemaType::group_type_builder("list")
+            .with_repetition(Repetition::REPEATED)
+            .with_fields(vec![Arc::new(element_type)])
+            .build()
+            .unwrap();
+
+        let list_type = SchemaType::group_type_builder("timestamps")
+            .with_repetition(Repetition::OPTIONAL)
+            .with_id(Some(1))
+            .with_logical_type(Some(parquet::basic::LogicalType::List))
+            .with_fields(vec![Arc::new(list_group)])
+            .build()
+            .unwrap();
+
+        let parquet_schema = SchemaType::group_type_builder("schema")
+            .with_fields(vec![Arc::new(list_type)])
+            .build()
+            .unwrap();
+
+        let (int96_val, expected_micros) = make_int96_test_value();
+
+        let file = File::create(&file_path).unwrap();
+        let mut writer =
+            SerializedFileWriter::new(file, Arc::new(parquet_schema), Default::default()).unwrap();
+
+        // Write a single row with a list containing one INT96 element.
+        // def=3: list present (1) + repeated group (2) + element present (3)
+        // rep=0: start of a new list
+        let mut row_group = writer.next_row_group().unwrap();
+        {
+            let mut col = row_group.next_column().unwrap().unwrap();
+            col.typed::<Int96Type>()
+                .write_batch(&[int96_val], Some(&[3]), Some(&[0]))
+                .unwrap();
+            col.close().unwrap();
+        }
+        row_group.close().unwrap();
+        writer.close().unwrap();
+
+        let iceberg_schema = Arc::new(
+            Schema::builder()
+                .with_schema_id(1)
+                .with_fields(vec![
+                    NestedField::optional(
+                        1,
+                        "timestamps",
+                        Type::List(crate::spec::ListType {
+                            element_field: NestedField::optional(
+                                2,
+                                "element",
+                                Type::Primitive(PrimitiveType::Timestamp),
+                            )
+                            .into(),
+                        }),
+                    )
+                    .into(),
+                ])
+                .build()
+                .unwrap(),
+        );
+
+        let batches = read_int96_batches(&file_path, iceberg_schema, vec![1]).await;
+
+        assert_eq!(batches.len(), 1);
+        let list_array = batches[0]
+            .column(0)
+            .as_any()
+            .downcast_ref::<arrow_array::ListArray>()
+            .expect("Expected ListArray");
+        let ts_array = list_array
+            .values()
+            .as_any()
+            .downcast_ref::<TimestampMicrosecondArray>()
+            .expect("Expected TimestampMicrosecondArray inside list");
+
+        assert_eq!(
+            ts_array.value(0),
+            expected_micros,
+            "INT96 in list: got {}, expected {expected_micros}",
+            ts_array.value(0)
+        );
+    }
+
+    /// Test reading INT96 timestamps as map values.
+    #[tokio::test]
+    async fn test_read_int96_timestamps_in_map() {
+        let tmp_dir = TempDir::new().unwrap();
+        let table_location = tmp_dir.path().to_str().unwrap().to_string();
+        let file_path = format!("{table_location}/map_int96.parquet");
+
+        // MAP encoding:
+        //   optional group ts_map (MAP) {
+        //     repeated group key_value {
+        //       required binary key (UTF8);
+        //       optional int96 value;
+        //     }
+        //   }
+        let key_type = SchemaType::primitive_type_builder("key", PhysicalType::BYTE_ARRAY)
+            .with_repetition(Repetition::REQUIRED)
+            .with_logical_type(Some(parquet::basic::LogicalType::String))
+            .with_id(Some(2))
+            .build()
+            .unwrap();
+
+        let value_type = SchemaType::primitive_type_builder("value", PhysicalType::INT96)
+            .with_repetition(Repetition::OPTIONAL)
+            .with_id(Some(3))
+            .build()
+            .unwrap();
+
+        let key_value_group = SchemaType::group_type_builder("key_value")
+            .with_repetition(Repetition::REPEATED)
+            .with_fields(vec![Arc::new(key_type), Arc::new(value_type)])
+            .build()
+            .unwrap();
+
+        let map_type = SchemaType::group_type_builder("ts_map")
+            .with_repetition(Repetition::OPTIONAL)
+            .with_id(Some(1))
+            .with_logical_type(Some(parquet::basic::LogicalType::Map))
+            .with_fields(vec![Arc::new(key_value_group)])
+            .build()
+            .unwrap();
+
+        let parquet_schema = SchemaType::group_type_builder("schema")
+            .with_fields(vec![Arc::new(map_type)])
+            .build()
+            .unwrap();
+
+        let (int96_val, expected_micros) = make_int96_test_value();
+
+        let file = File::create(&file_path).unwrap();
+        let mut writer =
+            SerializedFileWriter::new(file, Arc::new(parquet_schema), Default::default()).unwrap();
+
+        // Write a single row with a map containing one key-value pair.
+        // rep=0 for both columns: start of a new map.
+        // key def=2: map present (1) + key_value entry present (2), key is REQUIRED.
+        // value def=3: map present (1) + key_value entry present (2) + value present (3).
+        let mut row_group = writer.next_row_group().unwrap();
+        {
+            let mut col = row_group.next_column().unwrap().unwrap();
+            col.typed::<ByteArrayType>()
+                .write_batch(
+                    &[parquet::data_type::ByteArray::from("event_time")],
+                    Some(&[2]),
+                    Some(&[0]),
+                )
+                .unwrap();
+            col.close().unwrap();
+        }
+        {
+            let mut col = row_group.next_column().unwrap().unwrap();
+            col.typed::<Int96Type>()
+                .write_batch(&[int96_val], Some(&[3]), Some(&[0]))
+                .unwrap();
+            col.close().unwrap();
+        }
+        row_group.close().unwrap();
+        writer.close().unwrap();
+
+        let iceberg_schema = Arc::new(
+            Schema::builder()
+                .with_schema_id(1)
+                .with_fields(vec![
+                    NestedField::optional(
+                        1,
+                        "ts_map",
+                        Type::Map(crate::spec::MapType {
+                            key_field: NestedField::required(
+                                2,
+                                "key",
+                                Type::Primitive(PrimitiveType::String),
+                            )
+                            .into(),
+                            value_field: NestedField::optional(
+                                3,
+                                "value",
+                                Type::Primitive(PrimitiveType::Timestamp),
+                            )
+                            .into(),
+                        }),
+                    )
+                    .into(),
+                ])
+                .build()
+                .unwrap(),
+        );
+
+        let batches = read_int96_batches(&file_path, iceberg_schema, vec![1]).await;
+
+        assert_eq!(batches.len(), 1);
+        let map_array = batches[0]
+            .column(0)
+            .as_any()
+            .downcast_ref::<arrow_array::MapArray>()
+            .expect("Expected MapArray");
+        let ts_array = map_array
+            .values()
+            .as_any()
+            .downcast_ref::<TimestampMicrosecondArray>()
+            .expect("Expected TimestampMicrosecondArray as map values");
+
+        assert_eq!(
+            ts_array.value(0),
+            expected_micros,
+            "INT96 in map: got {}, expected {expected_micros}",
+            ts_array.value(0)
+        );
+    }
+}
diff --git a/crates/iceberg/src/arrow/mod.rs b/crates/iceberg/src/arrow/mod.rs
index c091c45177..7823320452 100644
--- a/crates/iceberg/src/arrow/mod.rs
+++ b/crates/iceberg/src/arrow/mod.rs
@@ -27,6 +27,7 @@ pub(crate) mod caching_delete_file_loader;
 pub mod delete_file_loader;
 pub(crate) mod delete_filter;
 
+mod int96;
 mod reader;
 /// RecordBatch projection utilities
 pub mod record_batch_projector;
diff --git a/crates/iceberg/src/arrow/reader.rs b/crates/iceberg/src/arrow/reader.rs
index 167ffd96c5..8eba4ec6c9 100644
--- a/crates/iceberg/src/arrow/reader.rs
+++ b/crates/iceberg/src/arrow/reader.rs
@@ -27,8 +27,7 @@ use arrow_array::{Array, ArrayRef, BooleanArray, Datum as ArrowDatum, RecordBatc
 use arrow_cast::cast::cast;
 use arrow_ord::cmp::{eq, gt, gt_eq, lt, lt_eq, neq};
 use arrow_schema::{
-    ArrowError, DataType, Field, FieldRef, Fields, Schema as ArrowSchema,
-    SchemaRef as ArrowSchemaRef, TimeUnit,
+    ArrowError, DataType, FieldRef, Schema as ArrowSchema, SchemaRef as ArrowSchemaRef,
 };
 use arrow_string::like::starts_with;
 use bytes::Bytes;
@@ -47,8 +46,8 @@ use parquet::schema::types::{SchemaDescriptor, Type as ParquetType};
 use typed_builder::TypedBuilder;
 
 use crate::arrow::caching_delete_file_loader::CachingDeleteFileLoader;
+use crate::arrow::int96::coerce_int96_timestamps;
 use crate::arrow::record_batch_transformer::RecordBatchTransformerBuilder;
-use crate::arrow::schema::{ArrowSchemaVisitor, DEFAULT_MAP_FIELD_NAME, visit_schema};
 use crate::arrow::{arrow_schema_to_schema, get_arrow_datum};
 use crate::delete_vector::DeleteVector;
 use crate::error::Result;
@@ -1255,203 +1254,6 @@ fn add_fallback_field_ids_to_arrow_schema(arrow_schema: &ArrowSchemaRef) -> Arc<
     ))
 }
 
-/// Coerce Arrow schema types for INT96 columns to match the Iceberg table schema.
-///
-/// arrow-rs defaults INT96 to `Timestamp(Nanosecond)`, which overflows i64 for dates outside
-/// ~1677-2262. We use arrow-rs's schema hint mechanism to read INT96 at the resolution
-/// specified by the Iceberg schema (`timestamp` → microsecond, `timestamp_ns` → nanosecond).
-///
-/// Iceberg Java handles this differently: it bypasses parquet-mr with a custom column reader
-/// (`GenericParquetReaders.TimestampInt96Reader`). We achieve the same result via schema hints.
-///
-/// References:
-/// - Iceberg spec primitive types: <https://iceberg.apache.org/spec/#primitive-types>
-/// - arrow-rs schema hint support: <https://github.com/apache/arrow-rs/pull/7285>
-fn coerce_int96_timestamps(
-    arrow_schema: &ArrowSchemaRef,
-    iceberg_schema: &Schema,
-) -> Option<Arc<ArrowSchema>> {
-    let mut visitor = Int96CoercionVisitor::new(iceberg_schema);
-    let coerced = visit_schema(arrow_schema, &mut visitor).ok()?;
-    if visitor.changed {
-        Some(Arc::new(coerced))
-    } else {
-        None
-    }
-}
-
-/// Visitor that coerces `Timestamp(Nanosecond)` Arrow fields to the resolution
-/// indicated by the Iceberg schema. Follows the same pattern as `MetadataStripVisitor`.
-struct Int96CoercionVisitor<'a> {
-    iceberg_schema: &'a Schema,
-    field_stack: Vec<Field>,
-    changed: bool,
-}
-
-impl<'a> Int96CoercionVisitor<'a> {
-    fn new(iceberg_schema: &'a Schema) -> Self {
-        Self {
-            iceberg_schema,
-            field_stack: Vec::new(),
-            changed: false,
-        }
-    }
-
-    /// Determine the target TimeUnit for a Timestamp(Nanosecond) field based on the
-    /// Iceberg schema. Falls back to microsecond when field IDs are unavailable,
-    /// matching Iceberg Java behavior.
-    fn target_unit(&self, field: &Field) -> Option<TimeUnit> {
-        if !matches!(
-            field.data_type(),
-            DataType::Timestamp(TimeUnit::Nanosecond, _)
-        ) {
-            return None;
-        }
-
-        let target = field
-            .metadata()
-            .get(PARQUET_FIELD_ID_META_KEY)
-            .and_then(|id_str| id_str.parse::<i32>().ok())
-            .and_then(|field_id| self.iceberg_schema.field_by_id(field_id))
-            .and_then(|f| match &*f.field_type {
-                Type::Primitive(PrimitiveType::Timestamp | PrimitiveType::Timestamptz) => {
-                    Some(TimeUnit::Microsecond)
-                }
-                Type::Primitive(PrimitiveType::TimestampNs | PrimitiveType::TimestamptzNs) => {
-                    Some(TimeUnit::Nanosecond)
-                }
-                _ => None,
-            })
-            // Iceberg Java reads INT96 as microseconds by default
-            .unwrap_or(TimeUnit::Microsecond);
-
-        if target == TimeUnit::Nanosecond {
-            None
-        } else {
-            Some(target)
-        }
-    }
-}
-
-impl ArrowSchemaVisitor for Int96CoercionVisitor<'_> {
-    type T = Field;
-    type U = ArrowSchema;
-
-    fn before_field(&mut self, field: &Field) -> Result<()> {
-        self.field_stack.push(field.as_ref().clone());
-        Ok(())
-    }
-
-    fn before_list_element(&mut self, field: &Field) -> Result<()> {
-        self.field_stack.push(field.as_ref().clone());
-        Ok(())
-    }
-
-    fn before_map_key(&mut self, field: &Field) -> Result<()> {
-        self.field_stack.push(field.as_ref().clone());
-        Ok(())
-    }
-
-    fn before_map_value(&mut self, field: &Field) -> Result<()> {
-        self.field_stack.push(field.as_ref().clone());
-        Ok(())
-    }
-
-    fn schema(&mut self, schema: &ArrowSchema, values: Vec<Self::T>) -> Result<Self::U> {
-        Ok(ArrowSchema::new_with_metadata(
-            values,
-            schema.metadata().clone(),
-        ))
-    }
-
-    fn r#struct(&mut self, _fields: &Fields, results: Vec<Self::T>) -> Result<Self::T> {
-        let field_info = self
-            .field_stack
-            .pop()
-            .ok_or_else(|| Error::new(ErrorKind::Unexpected, "Field stack underflow in struct"))?;
-        Ok(Field::new(
-            field_info.name(),
-            DataType::Struct(Fields::from(results)),
-            field_info.is_nullable(),
-        )
-        .with_metadata(field_info.metadata().clone()))
-    }
-
-    fn list(&mut self, list: &DataType, value: Self::T) -> Result<Self::T> {
-        let field_info = self
-            .field_stack
-            .pop()
-            .ok_or_else(|| Error::new(ErrorKind::Unexpected, "Field stack underflow in list"))?;
-        let list_type = match list {
-            DataType::List(_) => DataType::List(Arc::new(value)),
-            DataType::LargeList(_) => DataType::LargeList(Arc::new(value)),
-            DataType::FixedSizeList(_, size) => DataType::FixedSizeList(Arc::new(value), *size),
-            _ => {
-                return Err(Error::new(
-                    ErrorKind::Unexpected,
-                    format!("Expected list type, got {list}"),
-                ));
-            }
-        };
-        Ok(
-            Field::new(field_info.name(), list_type, field_info.is_nullable())
-                .with_metadata(field_info.metadata().clone()),
-        )
-    }
-
-    fn map(&mut self, map: &DataType, key_value: Self::T, value: Self::T) -> Result<Self::T> {
-        let field_info = self
-            .field_stack
-            .pop()
-            .ok_or_else(|| Error::new(ErrorKind::Unexpected, "Field stack underflow in map"))?;
-        let sorted = match map {
-            DataType::Map(_, sorted) => *sorted,
-            _ => {
-                return Err(Error::new(
-                    ErrorKind::Unexpected,
-                    format!("Expected map type, got {map}"),
-                ));
-            }
-        };
-        let struct_field = Field::new(
-            DEFAULT_MAP_FIELD_NAME,
-            DataType::Struct(Fields::from(vec![key_value, value])),
-            false,
-        );
-        Ok(Field::new(
-            field_info.name(),
-            DataType::Map(Arc::new(struct_field), sorted),
-            field_info.is_nullable(),
-        )
-        .with_metadata(field_info.metadata().clone()))
-    }
-
-    fn primitive(&mut self, p: &DataType) -> Result<Self::T> {
-        let field_info = self.field_stack.pop().ok_or_else(|| {
-            Error::new(ErrorKind::Unexpected, "Field stack underflow in primitive")
-        })?;
-
-        if let Some(target_unit) = self.target_unit(&field_info) {
-            let tz = match field_info.data_type() {
-                DataType::Timestamp(_, tz) => tz.clone(),
-                _ => None,
-            };
-            self.changed = true;
-            Ok(Field::new(
-                field_info.name(),
-                DataType::Timestamp(target_unit, tz),
-                field_info.is_nullable(),
-            )
-            .with_metadata(field_info.metadata().clone()))
-        } else {
-            Ok(
-                Field::new(field_info.name(), p.clone(), field_info.is_nullable())
-                    .with_metadata(field_info.metadata().clone()),
-            )
-        }
-    }
-}
-
 /// A visitor to collect field ids from bound predicates.
 struct CollectFieldIdVisitor {
     field_ids: HashSet<i32>,
@@ -2178,20 +1980,16 @@ mod tests {
     use std::sync::Arc;
 
     use arrow_array::cast::AsArray;
-    use arrow_array::{
-        ArrayRef, LargeStringArray, RecordBatch, StringArray, TimestampMicrosecondArray,
-    };
+    use arrow_array::{ArrayRef, LargeStringArray, RecordBatch, StringArray};
     use arrow_schema::{DataType, Field, Schema as ArrowSchema, TimeUnit};
     use futures::TryStreamExt;
     use parquet::arrow::arrow_reader::{RowSelection, RowSelector};
     use parquet::arrow::{ArrowWriter, ProjectionMask};
-    use parquet::basic::{Compression, Repetition, Type as PhysicalType};
-    use parquet::data_type::{ByteArrayType, Int32Type, Int96, Int96Type};
+    use parquet::basic::Compression;
     use parquet::file::metadata::{ColumnChunkMetaData, RowGroupMetaData};
     use parquet::file::properties::WriterProperties;
-    use parquet::file::writer::SerializedFileWriter;
     use parquet::schema::parser::parse_message_type;
-    use parquet::schema::types::{SchemaDescPtr, SchemaDescriptor, Type as SchemaType};
+    use parquet::schema::types::{SchemaDescPtr, SchemaDescriptor};
     use roaring::RoaringTreemap;
     use tempfile::TempDir;
 
@@ -4891,541 +4689,4 @@ message schema {
         assert_eq!(result[1], expected_1);
         assert_eq!(result[2], expected_2);
     }
-
-    // INT96 encoding: [nanos_low_u32, nanos_high_u32, julian_day_u32]
-    // Julian day 2_440_588 = Unix epoch (1970-01-01)
-    const UNIX_EPOCH_JULIAN: i64 = 2_440_588;
-    const MICROS_PER_DAY: i64 = 86_400_000_000;
-    /// Noon on 3333-01-01 (Julian day 2_953_529) — outside the i64 nanosecond range (~1677-2262).
-    const INT96_TEST_NANOS_WITHIN_DAY: u64 = 43_200_000_000_000;
-    const INT96_TEST_JULIAN_DAY: u32 = 2_953_529;
-
-    /// Build an INT96 value and its expected microsecond interpretation.
-    fn make_int96_test_value() -> (Int96, i64) {
-        let mut val = Int96::new();
-        val.set_data(
-            (INT96_TEST_NANOS_WITHIN_DAY & 0xFFFFFFFF) as u32,
-            (INT96_TEST_NANOS_WITHIN_DAY >> 32) as u32,
-            INT96_TEST_JULIAN_DAY,
-        );
-        let expected_micros = (INT96_TEST_JULIAN_DAY as i64 - UNIX_EPOCH_JULIAN) * MICROS_PER_DAY
-            + (INT96_TEST_NANOS_WITHIN_DAY / 1_000) as i64;
-        (val, expected_micros)
-    }
-
-    /// Read a Parquet file through ArrowReader and return the resulting batches.
-    async fn read_int96_batches(
-        file_path: &str,
-        schema: SchemaRef,
-        project_field_ids: Vec<i32>,
-    ) -> Vec<RecordBatch> {
-        let file_io = FileIO::new_with_fs();
-        let reader = ArrowReaderBuilder::new(file_io).build();
-
-        let file_size = std::fs::metadata(file_path).unwrap().len();
-        let task = FileScanTask {
-            file_size_in_bytes: file_size,
-            start: 0,
-            length: file_size,
-            record_count: None,
-            data_file_path: file_path.to_string(),
-            data_file_format: DataFileFormat::Parquet,
-            schema,
-            project_field_ids,
-            predicate: None,
-            deletes: vec![],
-            partition: None,
-            partition_spec: None,
-            name_mapping: None,
-            case_sensitive: false,
-        };
-
-        let tasks = Box::pin(futures::stream::iter(vec![Ok(task)])) as FileScanTaskStream;
-        reader.read(tasks).unwrap().try_collect().await.unwrap()
-    }
-
-    /// Writes a Parquet file with INT96 timestamps using SerializedFileWriter
-    /// (ArrowWriter cannot write INT96). Returns (file_path, expected_microsecond_values).
-    fn write_int96_parquet_file(
-        table_location: &str,
-        filename: &str,
-        with_field_ids: bool,
-    ) -> (String, Vec<i64>) {
-        let file_path = format!("{table_location}/{filename}");
-
-        let mut ts_builder = SchemaType::primitive_type_builder("ts", PhysicalType::INT96)
-            .with_repetition(Repetition::OPTIONAL);
-        let mut id_builder = SchemaType::primitive_type_builder("id", PhysicalType::INT32)
-            .with_repetition(Repetition::REQUIRED);
-
-        if with_field_ids {
-            ts_builder = ts_builder.with_id(Some(1));
-            id_builder = id_builder.with_id(Some(2));
-        }
-
-        let schema = SchemaType::group_type_builder("schema")
-            .with_fields(vec![
-                Arc::new(ts_builder.build().unwrap()),
-                Arc::new(id_builder.build().unwrap()),
-            ])
-            .build()
-            .unwrap();
-
-        // Dates outside the i64 nanosecond range (~1677-2262) overflow without coercion.
-        const NOON_NANOS: u64 = INT96_TEST_NANOS_WITHIN_DAY;
-        const JULIAN_3333: u32 = INT96_TEST_JULIAN_DAY;
-        const JULIAN_2100: u32 = 2_488_070;
-
-        let test_data: Vec<(u32, u32, u32, i64)> = vec![
-            // 3333-01-01 00:00:00
-            (
-                0,
-                0,
-                JULIAN_3333,
-                (JULIAN_3333 as i64 - UNIX_EPOCH_JULIAN) * MICROS_PER_DAY,
-            ),
-            // 3333-01-01 12:00:00
-            (
-                (NOON_NANOS & 0xFFFFFFFF) as u32,
-                (NOON_NANOS >> 32) as u32,
-                JULIAN_3333,
-                (JULIAN_3333 as i64 - UNIX_EPOCH_JULIAN) * MICROS_PER_DAY
-                    + (NOON_NANOS / 1_000) as i64,
-            ),
-            // 2100-01-01 00:00:00
-            (
-                0,
-                0,
-                JULIAN_2100,
-                (JULIAN_2100 as i64 - UNIX_EPOCH_JULIAN) * MICROS_PER_DAY,
-            ),
-        ];
-
-        let int96_values: Vec<Int96> = test_data
-            .iter()
-            .map(|(lo, hi, day, _)| {
-                let mut v = Int96::new();
-                v.set_data(*lo, *hi, *day);
-                v
-            })
-            .collect();
-
-        let id_values: Vec<i32> = (0..test_data.len() as i32).collect();
-        let expected_micros: Vec<i64> = test_data.iter().map(|(_, _, _, m)| *m).collect();
-
-        let file = File::create(&file_path).unwrap();
-        let mut writer =
-            SerializedFileWriter::new(file, Arc::new(schema), Default::default()).unwrap();
-
-        let mut row_group = writer.next_row_group().unwrap();
-        {
-            // def=1: ts is OPTIONAL and present. No repetition levels (top-level columns).
-            let mut col = row_group.next_column().unwrap().unwrap();
-            col.typed::<Int96Type>()
-                .write_batch(&int96_values, Some(&vec![1; test_data.len()]), None)
-                .unwrap();
-            col.close().unwrap();
-        }
-        {
-            let mut col = row_group.next_column().unwrap().unwrap();
-            col.typed::<Int32Type>()
-                .write_batch(&id_values, None, None)
-                .unwrap();
-            col.close().unwrap();
-        }
-        row_group.close().unwrap();
-        writer.close().unwrap();
-
-        (file_path, expected_micros)
-    }
-
-    /// Read INT96 Parquet file through ArrowReader and verify top-level microsecond timestamps.
-    async fn assert_int96_read_matches(
-        file_path: &str,
-        schema: SchemaRef,
-        project_field_ids: Vec<i32>,
-        expected_micros: &[i64],
-    ) {
-        let batches = read_int96_batches(file_path, schema, project_field_ids).await;
-
-        assert_eq!(batches.len(), 1);
-        let ts_array = batches[0]
-            .column(0)
-            .as_any()
-            .downcast_ref::<TimestampMicrosecondArray>()
-            .expect("Expected TimestampMicrosecondArray");
-
-        for (i, expected) in expected_micros.iter().enumerate() {
-            assert_eq!(
-                ts_array.value(i),
-                *expected,
-                "Row {i}: got {}, expected {expected}",
-                ts_array.value(i)
-            );
-        }
-    }
-
-    /// Files with embedded field IDs (branch 1 of schema resolution).
-    #[tokio::test]
-    async fn test_read_int96_timestamps_with_field_ids() {
-        let schema = Arc::new(
-            Schema::builder()
-                .with_schema_id(1)
-                .with_fields(vec![
-                    NestedField::optional(1, "ts", Type::Primitive(PrimitiveType::Timestamp))
-                        .into(),
-                    NestedField::required(2, "id", Type::Primitive(PrimitiveType::Int)).into(),
-                ])
-                .build()
-                .unwrap(),
-        );
-
-        let tmp_dir = TempDir::new().unwrap();
-        let table_location = tmp_dir.path().to_str().unwrap().to_string();
-        let (file_path, expected_micros) =
-            write_int96_parquet_file(&table_location, "with_ids.parquet", true);
-
-        assert_int96_read_matches(&file_path, schema, vec![1, 2], &expected_micros).await;
-    }
-
-    /// Migrated files without field IDs (branches 2/3 of schema resolution).
-    #[tokio::test]
-    async fn test_read_int96_timestamps_without_field_ids() {
-        let schema = Arc::new(
-            Schema::builder()
-                .with_schema_id(1)
-                .with_fields(vec![
-                    NestedField::optional(1, "ts", Type::Primitive(PrimitiveType::Timestamp))
-                        .into(),
-                    NestedField::required(2, "id", Type::Primitive(PrimitiveType::Int)).into(),
-                ])
-                .build()
-                .unwrap(),
-        );
-
-        let tmp_dir = TempDir::new().unwrap();
-        let table_location = tmp_dir.path().to_str().unwrap().to_string();
-        let (file_path, expected_micros) =
-            write_int96_parquet_file(&table_location, "no_ids.parquet", false);
-
-        assert_int96_read_matches(&file_path, schema, vec![1, 2], &expected_micros).await;
-    }
-
-    /// Test reading INT96 timestamps inside a struct field.
-    #[tokio::test]
-    async fn test_read_int96_timestamps_in_struct() {
-        let tmp_dir = TempDir::new().unwrap();
-        let table_location = tmp_dir.path().to_str().unwrap().to_string();
-        let file_path = format!("{table_location}/struct_int96.parquet");
-
-        let ts_type = SchemaType::primitive_type_builder("ts", PhysicalType::INT96)
-            .with_repetition(Repetition::OPTIONAL)
-            .with_id(Some(2))
-            .build()
-            .unwrap();
-
-        let struct_type = SchemaType::group_type_builder("data")
-            .with_repetition(Repetition::REQUIRED)
-            .with_id(Some(1))
-            .with_fields(vec![Arc::new(ts_type)])
-            .build()
-            .unwrap();
-
-        let parquet_schema = SchemaType::group_type_builder("schema")
-            .with_fields(vec![Arc::new(struct_type)])
-            .build()
-            .unwrap();
-
-        let (int96_val, expected_micros) = make_int96_test_value();
-
-        let file = File::create(&file_path).unwrap();
-        let mut writer =
-            SerializedFileWriter::new(file, Arc::new(parquet_schema), Default::default()).unwrap();
-
-        // def=1: struct is REQUIRED so no level, ts is OPTIONAL and present (1).
-        // No repetition levels needed (no repeated groups).
-        let mut row_group = writer.next_row_group().unwrap();
-        {
-            let mut col = row_group.next_column().unwrap().unwrap();
-            col.typed::<Int96Type>()
-                .write_batch(&[int96_val], Some(&[1]), None)
-                .unwrap();
-            col.close().unwrap();
-        }
-        row_group.close().unwrap();
-        writer.close().unwrap();
-
-        let iceberg_schema = Arc::new(
-            Schema::builder()
-                .with_schema_id(1)
-                .with_fields(vec![
-                    NestedField::required(
-                        1,
-                        "data",
-                        Type::Struct(crate::spec::StructType::new(vec![
-                            NestedField::optional(
-                                2,
-                                "ts",
-                                Type::Primitive(PrimitiveType::Timestamp),
-                            )
-                            .into(),
-                        ])),
-                    )
-                    .into(),
-                ])
-                .build()
-                .unwrap(),
-        );
-
-        let batches = read_int96_batches(&file_path, iceberg_schema, vec![1]).await;
-
-        assert_eq!(batches.len(), 1);
-        let struct_array = batches[0]
-            .column(0)
-            .as_any()
-            .downcast_ref::<arrow_array::StructArray>()
-            .expect("Expected StructArray");
-        let ts_array = struct_array
-            .column(0)
-            .as_any()
-            .downcast_ref::<TimestampMicrosecondArray>()
-            .expect("Expected TimestampMicrosecondArray inside struct");
-
-        assert_eq!(
-            ts_array.value(0),
-            expected_micros,
-            "INT96 in struct: got {}, expected {expected_micros}",
-            ts_array.value(0)
-        );
-    }
-
-    /// Test reading INT96 timestamps inside a list field (3-level Parquet LIST encoding).
-    #[tokio::test]
-    async fn test_read_int96_timestamps_in_list() {
-        let tmp_dir = TempDir::new().unwrap();
-        let table_location = tmp_dir.path().to_str().unwrap().to_string();
-        let file_path = format!("{table_location}/list_int96.parquet");
-
-        // 3-level LIST encoding:
-        //   optional group timestamps (LIST) {
-        //     repeated group list {
-        //       optional int96 element;
-        //     }
-        //   }
-        let element_type = SchemaType::primitive_type_builder("element", PhysicalType::INT96)
-            .with_repetition(Repetition::OPTIONAL)
-            .with_id(Some(2))
-            .build()
-            .unwrap();
-
-        let list_group = SchemaType::group_type_builder("list")
-            .with_repetition(Repetition::REPEATED)
-            .with_fields(vec![Arc::new(element_type)])
-            .build()
-            .unwrap();
-
-        let list_type = SchemaType::group_type_builder("timestamps")
-            .with_repetition(Repetition::OPTIONAL)
-            .with_id(Some(1))
-            .with_logical_type(Some(parquet::basic::LogicalType::List))
-            .with_fields(vec![Arc::new(list_group)])
-            .build()
-            .unwrap();
-
-        let parquet_schema = SchemaType::group_type_builder("schema")
-            .with_fields(vec![Arc::new(list_type)])
-            .build()
-            .unwrap();
-
-        let (int96_val, expected_micros) = make_int96_test_value();
-
-        let file = File::create(&file_path).unwrap();
-        let mut writer =
-            SerializedFileWriter::new(file, Arc::new(parquet_schema), Default::default()).unwrap();
-
-        // Write a single row with a list containing one INT96 element.
-        // def=3: list present (1) + repeated group (2) + element present (3)
-        // rep=0: start of a new list
-        let mut row_group = writer.next_row_group().unwrap();
-        {
-            let mut col = row_group.next_column().unwrap().unwrap();
-            col.typed::<Int96Type>()
-                .write_batch(&[int96_val], Some(&[3]), Some(&[0]))
-                .unwrap();
-            col.close().unwrap();
-        }
-        row_group.close().unwrap();
-        writer.close().unwrap();
-
-        let iceberg_schema = Arc::new(
-            Schema::builder()
-                .with_schema_id(1)
-                .with_fields(vec![
-                    NestedField::optional(
-                        1,
-                        "timestamps",
-                        Type::List(crate::spec::ListType {
-                            element_field: NestedField::optional(
-                                2,
-                                "element",
-                                Type::Primitive(PrimitiveType::Timestamp),
-                            )
-                            .into(),
-                        }),
-                    )
-                    .into(),
-                ])
-                .build()
-                .unwrap(),
-        );
-
-        let batches = read_int96_batches(&file_path, iceberg_schema, vec![1]).await;
-
-        assert_eq!(batches.len(), 1);
-        let list_array = batches[0]
-            .column(0)
-            .as_any()
-            .downcast_ref::<arrow_array::ListArray>()
-            .expect("Expected ListArray");
-        let ts_array = list_array
-            .values()
-            .as_any()
-            .downcast_ref::<TimestampMicrosecondArray>()
-            .expect("Expected TimestampMicrosecondArray inside list");
-
-        assert_eq!(
-            ts_array.value(0),
-            expected_micros,
-            "INT96 in list: got {}, expected {expected_micros}",
-            ts_array.value(0)
-        );
-    }
-
-    /// Test reading INT96 timestamps as map values.
-    #[tokio::test]
-    async fn test_read_int96_timestamps_in_map() {
-        let tmp_dir = TempDir::new().unwrap();
-        let table_location = tmp_dir.path().to_str().unwrap().to_string();
-        let file_path = format!("{table_location}/map_int96.parquet");
-
-        // MAP encoding:
-        //   optional group ts_map (MAP) {
-        //     repeated group key_value {
-        //       required binary key (UTF8);
-        //       optional int96 value;
-        //     }
-        //   }
-        let key_type = SchemaType::primitive_type_builder("key", PhysicalType::BYTE_ARRAY)
-            .with_repetition(Repetition::REQUIRED)
-            .with_logical_type(Some(parquet::basic::LogicalType::String))
-            .with_id(Some(2))
-            .build()
-            .unwrap();
-
-        let value_type = SchemaType::primitive_type_builder("value", PhysicalType::INT96)
-            .with_repetition(Repetition::OPTIONAL)
-            .with_id(Some(3))
-            .build()
-            .unwrap();
-
-        let key_value_group = SchemaType::group_type_builder("key_value")
-            .with_repetition(Repetition::REPEATED)
-            .with_fields(vec![Arc::new(key_type), Arc::new(value_type)])
-            .build()
-            .unwrap();
-
-        let map_type = SchemaType::group_type_builder("ts_map")
-            .with_repetition(Repetition::OPTIONAL)
-            .with_id(Some(1))
-            .with_logical_type(Some(parquet::basic::LogicalType::Map))
-            .with_fields(vec![Arc::new(key_value_group)])
-            .build()
-            .unwrap();
-
-        let parquet_schema = SchemaType::group_type_builder("schema")
-            .with_fields(vec![Arc::new(map_type)])
-            .build()
-            .unwrap();
-
-        let (int96_val, expected_micros) = make_int96_test_value();
-
-        let file = File::create(&file_path).unwrap();
-        let mut writer =
-            SerializedFileWriter::new(file, Arc::new(parquet_schema), Default::default()).unwrap();
-
-        // Write a single row with a map containing one key-value pair.
-        // rep=0 for both columns: start of a new map.
-        // key def=2: map present (1) + key_value entry present (2), key is REQUIRED.
-        // value def=3: map present (1) + key_value entry present (2) + value present (3).
-        let mut row_group = writer.next_row_group().unwrap();
-        {
-            let mut col = row_group.next_column().unwrap().unwrap();
-            col.typed::<ByteArrayType>()
-                .write_batch(
-                    &[parquet::data_type::ByteArray::from("event_time")],
-                    Some(&[2]),
-                    Some(&[0]),
-                )
-                .unwrap();
-            col.close().unwrap();
-        }
-        {
-            let mut col = row_group.next_column().unwrap().unwrap();
-            col.typed::<Int96Type>()
-                .write_batch(&[int96_val], Some(&[3]), Some(&[0]))
-                .unwrap();
-            col.close().unwrap();
-        }
-        row_group.close().unwrap();
-        writer.close().unwrap();
-
-        let iceberg_schema = Arc::new(
-            Schema::builder()
-                .with_schema_id(1)
-                .with_fields(vec![
-                    NestedField::optional(
-                        1,
-                        "ts_map",
-                        Type::Map(crate::spec::MapType {
-                            key_field: NestedField::required(
-                                2,
-                                "key",
-                                Type::Primitive(PrimitiveType::String),
-                            )
-                            .into(),
-                            value_field: NestedField::optional(
-                                3,
-                                "value",
-                                Type::Primitive(PrimitiveType::Timestamp),
-                            )
-                            .into(),
-                        }),
-                    )
-                    .into(),
-                ])
-                .build()
-                .unwrap(),
-        );
-
-        let batches = read_int96_batches(&file_path, iceberg_schema, vec![1]).await;
-
-        assert_eq!(batches.len(), 1);
-        let map_array = batches[0]
-            .column(0)
-            .as_any()
-            .downcast_ref::<arrow_array::MapArray>()
-            .expect("Expected MapArray");
-        let ts_array = map_array
-            .values()
-            .as_any()
-            .downcast_ref::<TimestampMicrosecondArray>()
-            .expect("Expected TimestampMicrosecondArray as map values");
-
-        assert_eq!(
-            ts_array.value(0),
-            expected_micros,
-            "INT96 in map: got {}, expected {expected_micros}",
-            ts_array.value(0)
-        );
-    }
 }

From 38eee0339f275f5563d360cc2643364235a241d0 Mon Sep 17 00:00:00 2001
From: Matt Butrovich <mbutrovich@gmail.com>
Date: Wed, 15 Apr 2026 10:24:02 -0400
Subject: [PATCH 8/8] Address PR feedback.

---
 crates/iceberg/src/arrow/int96.rs  | 768 ++++++++++-------------------
 crates/iceberg/src/arrow/reader.rs | 553 +++++++++++++++++++++
 2 files changed, 824 insertions(+), 497 deletions(-)

diff --git a/crates/iceberg/src/arrow/int96.rs b/crates/iceberg/src/arrow/int96.rs
index 57516a63d9..63a7a30f1a 100644
--- a/crates/iceberg/src/arrow/int96.rs
+++ b/crates/iceberg/src/arrow/int96.rs
@@ -249,556 +249,330 @@ impl ArrowSchemaVisitor for Int96CoercionVisitor<'_> {
 
 #[cfg(test)]
 mod tests {
-    use std::fs::File;
+    use std::collections::HashMap;
     use std::sync::Arc;
 
-    use arrow_array::TimestampMicrosecondArray;
-    use futures::TryStreamExt;
-    use parquet::basic::{Repetition, Type as PhysicalType};
-    use parquet::data_type::{ByteArrayType, Int32Type, Int96, Int96Type};
-    use parquet::file::writer::SerializedFileWriter;
-    use parquet::schema::types::Type as SchemaType;
-    use tempfile::TempDir;
-
-    use crate::arrow::ArrowReaderBuilder;
-    use crate::io::FileIO;
-    use crate::scan::{FileScanTask, FileScanTaskStream};
-    use crate::spec::{DataFileFormat, NestedField, PrimitiveType, Schema, SchemaRef, Type};
-
-    // INT96 encoding: [nanos_low_u32, nanos_high_u32, julian_day_u32]
-    // Julian day 2_440_588 = Unix epoch (1970-01-01)
-    const UNIX_EPOCH_JULIAN: i64 = 2_440_588;
-    const MICROS_PER_DAY: i64 = 86_400_000_000;
-    /// Noon on 3333-01-01 (Julian day 2_953_529) — outside the i64 nanosecond range (~1677-2262).
-    const INT96_TEST_NANOS_WITHIN_DAY: u64 = 43_200_000_000_000;
-    const INT96_TEST_JULIAN_DAY: u32 = 2_953_529;
-
-    /// Build an INT96 value and its expected microsecond interpretation.
-    fn make_int96_test_value() -> (Int96, i64) {
-        let mut val = Int96::new();
-        val.set_data(
-            (INT96_TEST_NANOS_WITHIN_DAY & 0xFFFFFFFF) as u32,
-            (INT96_TEST_NANOS_WITHIN_DAY >> 32) as u32,
-            INT96_TEST_JULIAN_DAY,
-        );
-        let expected_micros = (INT96_TEST_JULIAN_DAY as i64 - UNIX_EPOCH_JULIAN) * MICROS_PER_DAY
-            + (INT96_TEST_NANOS_WITHIN_DAY / 1_000) as i64;
-        (val, expected_micros)
-    }
-
-    /// Read a Parquet file through ArrowReader and return the resulting batches.
-    async fn read_int96_batches(
-        file_path: &str,
-        schema: SchemaRef,
-        project_field_ids: Vec<i32>,
-    ) -> Vec<arrow_array::RecordBatch> {
-        let file_io = FileIO::new_with_fs();
-        let reader = ArrowReaderBuilder::new(file_io).build();
-
-        let file_size = std::fs::metadata(file_path).unwrap().len();
-        let task = FileScanTask {
-            file_size_in_bytes: file_size,
-            start: 0,
-            length: file_size,
-            record_count: None,
-            data_file_path: file_path.to_string(),
-            data_file_format: DataFileFormat::Parquet,
-            schema,
-            project_field_ids,
-            predicate: None,
-            deletes: vec![],
-            partition: None,
-            partition_spec: None,
-            name_mapping: None,
-            case_sensitive: false,
-        };
+    use arrow_schema::{DataType, Field, Schema as ArrowSchema, TimeUnit};
+    use parquet::arrow::PARQUET_FIELD_ID_META_KEY;
 
-        let tasks = Box::pin(futures::stream::iter(vec![Ok(task)])) as FileScanTaskStream;
-        reader.read(tasks).unwrap().try_collect().await.unwrap()
-    }
-
-    /// Writes a Parquet file with INT96 timestamps using SerializedFileWriter
-    /// (ArrowWriter cannot write INT96). Returns (file_path, expected_microsecond_values).
-    fn write_int96_parquet_file(
-        table_location: &str,
-        filename: &str,
-        with_field_ids: bool,
-    ) -> (String, Vec<i64>) {
-        let file_path = format!("{table_location}/{filename}");
-
-        let mut ts_builder = SchemaType::primitive_type_builder("ts", PhysicalType::INT96)
-            .with_repetition(Repetition::OPTIONAL);
-        let mut id_builder = SchemaType::primitive_type_builder("id", PhysicalType::INT32)
-            .with_repetition(Repetition::REQUIRED);
-
-        if with_field_ids {
-            ts_builder = ts_builder.with_id(Some(1));
-            id_builder = id_builder.with_id(Some(2));
-        }
+    use super::coerce_int96_timestamps;
+    use crate::spec::{ListType, MapType, NestedField, PrimitiveType, Schema, StructType, Type};
 
-        let schema = SchemaType::group_type_builder("schema")
+    fn iceberg_schema_with_timestamp() -> Schema {
+        Schema::builder()
+            .with_schema_id(1)
             .with_fields(vec![
-                Arc::new(ts_builder.build().unwrap()),
-                Arc::new(id_builder.build().unwrap()),
+                NestedField::optional(1, "ts", Type::Primitive(PrimitiveType::Timestamp)).into(),
+                NestedField::required(2, "id", Type::Primitive(PrimitiveType::Int)).into(),
             ])
             .build()
-            .unwrap();
-
-        // Dates outside the i64 nanosecond range (~1677-2262) overflow without coercion.
-        const NOON_NANOS: u64 = INT96_TEST_NANOS_WITHIN_DAY;
-        const JULIAN_3333: u32 = INT96_TEST_JULIAN_DAY;
-        const JULIAN_2100: u32 = 2_488_070;
-
-        let test_data: Vec<(u32, u32, u32, i64)> = vec![
-            // 3333-01-01 00:00:00
-            (
-                0,
-                0,
-                JULIAN_3333,
-                (JULIAN_3333 as i64 - UNIX_EPOCH_JULIAN) * MICROS_PER_DAY,
-            ),
-            // 3333-01-01 12:00:00
-            (
-                (NOON_NANOS & 0xFFFFFFFF) as u32,
-                (NOON_NANOS >> 32) as u32,
-                JULIAN_3333,
-                (JULIAN_3333 as i64 - UNIX_EPOCH_JULIAN) * MICROS_PER_DAY
-                    + (NOON_NANOS / 1_000) as i64,
-            ),
-            // 2100-01-01 00:00:00
-            (
-                0,
-                0,
-                JULIAN_2100,
-                (JULIAN_2100 as i64 - UNIX_EPOCH_JULIAN) * MICROS_PER_DAY,
-            ),
-        ];
-
-        let int96_values: Vec<Int96> = test_data
-            .iter()
-            .map(|(lo, hi, day, _)| {
-                let mut v = Int96::new();
-                v.set_data(*lo, *hi, *day);
-                v
-            })
-            .collect();
-
-        let id_values: Vec<i32> = (0..test_data.len() as i32).collect();
-        let expected_micros: Vec<i64> = test_data.iter().map(|(_, _, _, m)| *m).collect();
-
-        let file = File::create(&file_path).unwrap();
-        let mut writer =
-            SerializedFileWriter::new(file, Arc::new(schema), Default::default()).unwrap();
-
-        let mut row_group = writer.next_row_group().unwrap();
-        {
-            // def=1: ts is OPTIONAL and present. No repetition levels (top-level columns).
-            let mut col = row_group.next_column().unwrap().unwrap();
-            col.typed::<Int96Type>()
-                .write_batch(&int96_values, Some(&vec![1; test_data.len()]), None)
-                .unwrap();
-            col.close().unwrap();
-        }
-        {
-            let mut col = row_group.next_column().unwrap().unwrap();
-            col.typed::<Int32Type>()
-                .write_batch(&id_values, None, None)
-                .unwrap();
-            col.close().unwrap();
-        }
-        row_group.close().unwrap();
-        writer.close().unwrap();
-
-        (file_path, expected_micros)
+            .unwrap()
     }
 
-    /// Read INT96 Parquet file through ArrowReader and verify top-level microsecond timestamps.
-    async fn assert_int96_read_matches(
-        file_path: &str,
-        schema: SchemaRef,
-        project_field_ids: Vec<i32>,
-        expected_micros: &[i64],
-    ) {
-        let batches = read_int96_batches(file_path, schema, project_field_ids).await;
-
-        assert_eq!(batches.len(), 1);
-        let ts_array = batches[0]
-            .column(0)
-            .as_any()
-            .downcast_ref::<TimestampMicrosecondArray>()
-            .expect("Expected TimestampMicrosecondArray");
-
-        for (i, expected) in expected_micros.iter().enumerate() {
-            assert_eq!(
-                ts_array.value(i),
-                *expected,
-                "Row {i}: got {}, expected {expected}",
-                ts_array.value(i)
-            );
-        }
+    fn field_id_meta(id: i32) -> HashMap<String, String> {
+        HashMap::from([(PARQUET_FIELD_ID_META_KEY.to_string(), id.to_string())])
     }
 
-    /// Files with embedded field IDs (branch 1 of schema resolution).
-    #[tokio::test]
-    async fn test_read_int96_timestamps_with_field_ids() {
-        let schema = Arc::new(
-            Schema::builder()
-                .with_schema_id(1)
-                .with_fields(vec![
-                    NestedField::optional(1, "ts", Type::Primitive(PrimitiveType::Timestamp))
-                        .into(),
-                    NestedField::required(2, "id", Type::Primitive(PrimitiveType::Int)).into(),
-                ])
-                .build()
-                .unwrap(),
-        );
+    #[test]
+    fn test_coerce_timestamp_ns_to_us() {
+        let arrow_schema = Arc::new(ArrowSchema::new(vec![
+            Field::new("ts", DataType::Timestamp(TimeUnit::Nanosecond, None), true)
+                .with_metadata(field_id_meta(1)),
+            Field::new("id", DataType::Int32, false).with_metadata(field_id_meta(2)),
+        ]));
+        let iceberg = iceberg_schema_with_timestamp();
 
-        let tmp_dir = TempDir::new().unwrap();
-        let table_location = tmp_dir.path().to_str().unwrap().to_string();
-        let (file_path, expected_micros) =
-            write_int96_parquet_file(&table_location, "with_ids.parquet", true);
-
-        assert_int96_read_matches(&file_path, schema, vec![1, 2], &expected_micros).await;
+        let coerced = coerce_int96_timestamps(&arrow_schema, &iceberg).unwrap();
+        assert_eq!(
+            coerced.field(0).data_type(),
+            &DataType::Timestamp(TimeUnit::Microsecond, None)
+        );
+        // Non-timestamp field unchanged
+        assert_eq!(coerced.field(1).data_type(), &DataType::Int32);
     }
 
-    /// Migrated files without field IDs (branches 2/3 of schema resolution).
-    #[tokio::test]
-    async fn test_read_int96_timestamps_without_field_ids() {
-        let schema = Arc::new(
-            Schema::builder()
-                .with_schema_id(1)
-                .with_fields(vec![
-                    NestedField::optional(1, "ts", Type::Primitive(PrimitiveType::Timestamp))
-                        .into(),
-                    NestedField::required(2, "id", Type::Primitive(PrimitiveType::Int)).into(),
-                ])
-                .build()
-                .unwrap(),
-        );
+    #[test]
+    fn test_coerce_timestamptz_ns_to_us() {
+        let iceberg = Schema::builder()
+            .with_schema_id(1)
+            .with_fields(vec![
+                NestedField::optional(1, "ts", Type::Primitive(PrimitiveType::Timestamptz)).into(),
+            ])
+            .build()
+            .unwrap();
 
-        let tmp_dir = TempDir::new().unwrap();
-        let table_location = tmp_dir.path().to_str().unwrap().to_string();
-        let (file_path, expected_micros) =
-            write_int96_parquet_file(&table_location, "no_ids.parquet", false);
+        let arrow_schema = Arc::new(ArrowSchema::new(vec![
+            Field::new(
+                "ts",
+                DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into())),
+                true,
+            )
+            .with_metadata(field_id_meta(1)),
+        ]));
 
-        assert_int96_read_matches(&file_path, schema, vec![1, 2], &expected_micros).await;
+        let coerced = coerce_int96_timestamps(&arrow_schema, &iceberg).unwrap();
+        assert_eq!(
+            coerced.field(0).data_type(),
+            &DataType::Timestamp(TimeUnit::Microsecond, Some("UTC".into()))
+        );
     }
 
-    /// Test reading INT96 timestamps inside a struct field.
-    #[tokio::test]
-    async fn test_read_int96_timestamps_in_struct() {
-        let tmp_dir = TempDir::new().unwrap();
-        let table_location = tmp_dir.path().to_str().unwrap().to_string();
-        let file_path = format!("{table_location}/struct_int96.parquet");
-
-        let ts_type = SchemaType::primitive_type_builder("ts", PhysicalType::INT96)
-            .with_repetition(Repetition::OPTIONAL)
-            .with_id(Some(2))
+    #[test]
+    fn test_no_coercion_when_iceberg_is_timestamp_ns() {
+        let iceberg = Schema::builder()
+            .with_schema_id(1)
+            .with_fields(vec![
+                NestedField::optional(1, "ts", Type::Primitive(PrimitiveType::TimestampNs)).into(),
+            ])
             .build()
             .unwrap();
 
-        let struct_type = SchemaType::group_type_builder("data")
-            .with_repetition(Repetition::REQUIRED)
-            .with_id(Some(1))
-            .with_fields(vec![Arc::new(ts_type)])
-            .build()
-            .unwrap();
+        let arrow_schema = Arc::new(ArrowSchema::new(vec![
+            Field::new("ts", DataType::Timestamp(TimeUnit::Nanosecond, None), true)
+                .with_metadata(field_id_meta(1)),
+        ]));
+
+        assert!(coerce_int96_timestamps(&arrow_schema, &iceberg).is_none());
+    }
 
-        let parquet_schema = SchemaType::group_type_builder("schema")
-            .with_fields(vec![Arc::new(struct_type)])
+    #[test]
+    fn test_no_coercion_when_iceberg_is_timestamptz_ns() {
+        let iceberg = Schema::builder()
+            .with_schema_id(1)
+            .with_fields(vec![
+                NestedField::optional(1, "ts", Type::Primitive(PrimitiveType::TimestamptzNs))
+                    .into(),
+            ])
             .build()
             .unwrap();
 
-        let (int96_val, expected_micros) = make_int96_test_value();
-
-        let file = File::create(&file_path).unwrap();
-        let mut writer =
-            SerializedFileWriter::new(file, Arc::new(parquet_schema), Default::default()).unwrap();
-
-        // def=1: struct is REQUIRED so no level, ts is OPTIONAL and present (1).
-        // No repetition levels needed (no repeated groups).
-        let mut row_group = writer.next_row_group().unwrap();
-        {
-            let mut col = row_group.next_column().unwrap().unwrap();
-            col.typed::<Int96Type>()
-                .write_batch(&[int96_val], Some(&[1]), None)
-                .unwrap();
-            col.close().unwrap();
-        }
-        row_group.close().unwrap();
-        writer.close().unwrap();
-
-        let iceberg_schema = Arc::new(
-            Schema::builder()
-                .with_schema_id(1)
-                .with_fields(vec![
-                    NestedField::required(
-                        1,
-                        "data",
-                        Type::Struct(crate::spec::StructType::new(vec![
-                            NestedField::optional(
-                                2,
-                                "ts",
-                                Type::Primitive(PrimitiveType::Timestamp),
-                            )
-                            .into(),
-                        ])),
-                    )
-                    .into(),
-                ])
-                .build()
-                .unwrap(),
-        );
+        let arrow_schema = Arc::new(ArrowSchema::new(vec![
+            Field::new(
+                "ts",
+                DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into())),
+                true,
+            )
+            .with_metadata(field_id_meta(1)),
+        ]));
 
-        let batches = read_int96_batches(&file_path, iceberg_schema, vec![1]).await;
+        assert!(coerce_int96_timestamps(&arrow_schema, &iceberg).is_none());
+    }
 
-        assert_eq!(batches.len(), 1);
-        let struct_array = batches[0]
-            .column(0)
-            .as_any()
-            .downcast_ref::<arrow_array::StructArray>()
-            .expect("Expected StructArray");
-        let ts_array = struct_array
-            .column(0)
-            .as_any()
-            .downcast_ref::<TimestampMicrosecondArray>()
-            .expect("Expected TimestampMicrosecondArray inside struct");
+    #[test]
+    fn test_no_coercion_when_already_microsecond() {
+        let arrow_schema = Arc::new(ArrowSchema::new(vec![
+            Field::new("ts", DataType::Timestamp(TimeUnit::Microsecond, None), true)
+                .with_metadata(field_id_meta(1)),
+            Field::new("id", DataType::Int32, false).with_metadata(field_id_meta(2)),
+        ]));
+        let iceberg = iceberg_schema_with_timestamp();
 
+        assert!(coerce_int96_timestamps(&arrow_schema, &iceberg).is_none());
+    }
+
+    // Without field IDs, the visitor can't look up the Iceberg type and falls back
+    // to microsecond to match Iceberg Java behavior.
+    #[test]
+    fn test_defaults_to_us_without_field_ids() {
+        let arrow_schema = Arc::new(ArrowSchema::new(vec![Field::new(
+            "ts",
+            DataType::Timestamp(TimeUnit::Nanosecond, None),
+            true,
+        )]));
+        let iceberg = iceberg_schema_with_timestamp();
+
+        let coerced = coerce_int96_timestamps(&arrow_schema, &iceberg).unwrap();
         assert_eq!(
-            ts_array.value(0),
-            expected_micros,
-            "INT96 in struct: got {}, expected {expected_micros}",
-            ts_array.value(0)
+            coerced.field(0).data_type(),
+            &DataType::Timestamp(TimeUnit::Microsecond, None)
         );
     }
 
-    /// Test reading INT96 timestamps inside a list field (3-level Parquet LIST encoding).
-    #[tokio::test]
-    async fn test_read_int96_timestamps_in_list() {
-        let tmp_dir = TempDir::new().unwrap();
-        let table_location = tmp_dir.path().to_str().unwrap().to_string();
-        let file_path = format!("{table_location}/list_int96.parquet");
-
-        // 3-level LIST encoding:
-        //   optional group timestamps (LIST) {
-        //     repeated group list {
-        //       optional int96 element;
-        //     }
-        //   }
-        let element_type = SchemaType::primitive_type_builder("element", PhysicalType::INT96)
-            .with_repetition(Repetition::OPTIONAL)
-            .with_id(Some(2))
-            .build()
-            .unwrap();
-
-        let list_group = SchemaType::group_type_builder("list")
-            .with_repetition(Repetition::REPEATED)
-            .with_fields(vec![Arc::new(element_type)])
+    // Field ID exists but points to a non-timestamp Iceberg type. The field_by_id
+    // lookup succeeds but the match arm returns None, so unwrap_or falls back to
+    // microsecond.
+    #[test]
+    fn test_defaults_to_us_when_iceberg_type_is_not_timestamp() {
+        let iceberg = Schema::builder()
+            .with_schema_id(1)
+            .with_fields(vec![
+                NestedField::optional(1, "ts", Type::Primitive(PrimitiveType::String)).into(),
+            ])
             .build()
             .unwrap();
 
-        let list_type = SchemaType::group_type_builder("timestamps")
-            .with_repetition(Repetition::OPTIONAL)
-            .with_id(Some(1))
-            .with_logical_type(Some(parquet::basic::LogicalType::List))
-            .with_fields(vec![Arc::new(list_group)])
-            .build()
-            .unwrap();
+        let arrow_schema = Arc::new(ArrowSchema::new(vec![
+            Field::new("ts", DataType::Timestamp(TimeUnit::Nanosecond, None), true)
+                .with_metadata(field_id_meta(1)),
+        ]));
 
-        let parquet_schema = SchemaType::group_type_builder("schema")
-            .with_fields(vec![Arc::new(list_type)])
-            .build()
-            .unwrap();
-
-        let (int96_val, expected_micros) = make_int96_test_value();
-
-        let file = File::create(&file_path).unwrap();
-        let mut writer =
-            SerializedFileWriter::new(file, Arc::new(parquet_schema), Default::default()).unwrap();
-
-        // Write a single row with a list containing one INT96 element.
-        // def=3: list present (1) + repeated group (2) + element present (3)
-        // rep=0: start of a new list
-        let mut row_group = writer.next_row_group().unwrap();
-        {
-            let mut col = row_group.next_column().unwrap().unwrap();
-            col.typed::<Int96Type>()
-                .write_batch(&[int96_val], Some(&[3]), Some(&[0]))
-                .unwrap();
-            col.close().unwrap();
-        }
-        row_group.close().unwrap();
-        writer.close().unwrap();
-
-        let iceberg_schema = Arc::new(
-            Schema::builder()
-                .with_schema_id(1)
-                .with_fields(vec![
-                    NestedField::optional(
-                        1,
-                        "timestamps",
-                        Type::List(crate::spec::ListType {
-                            element_field: NestedField::optional(
-                                2,
-                                "element",
-                                Type::Primitive(PrimitiveType::Timestamp),
-                            )
-                            .into(),
-                        }),
-                    )
-                    .into(),
-                ])
-                .build()
-                .unwrap(),
+        let coerced = coerce_int96_timestamps(&arrow_schema, &iceberg).unwrap();
+        assert_eq!(
+            coerced.field(0).data_type(),
+            &DataType::Timestamp(TimeUnit::Microsecond, None)
         );
+    }
 
-        let batches = read_int96_batches(&file_path, iceberg_schema, vec![1]).await;
+    #[test]
+    fn test_coerce_preserves_field_metadata() {
+        let mut meta = field_id_meta(1);
+        meta.insert("custom_key".to_string(), "custom_value".to_string());
 
-        assert_eq!(batches.len(), 1);
-        let list_array = batches[0]
-            .column(0)
-            .as_any()
-            .downcast_ref::<arrow_array::ListArray>()
-            .expect("Expected ListArray");
-        let ts_array = list_array
-            .values()
-            .as_any()
-            .downcast_ref::<TimestampMicrosecondArray>()
-            .expect("Expected TimestampMicrosecondArray inside list");
+        let arrow_schema = Arc::new(ArrowSchema::new(vec![
+            Field::new("ts", DataType::Timestamp(TimeUnit::Nanosecond, None), true)
+                .with_metadata(meta.clone()),
+        ]));
+        let iceberg = iceberg_schema_with_timestamp();
 
-        assert_eq!(
-            ts_array.value(0),
-            expected_micros,
-            "INT96 in list: got {}, expected {expected_micros}",
-            ts_array.value(0)
-        );
+        let coerced = coerce_int96_timestamps(&arrow_schema, &iceberg).unwrap();
+        assert_eq!(coerced.field(0).metadata(), &meta);
     }
 
-    /// Test reading INT96 timestamps as map values.
-    #[tokio::test]
-    async fn test_read_int96_timestamps_in_map() {
-        let tmp_dir = TempDir::new().unwrap();
-        let table_location = tmp_dir.path().to_str().unwrap().to_string();
-        let file_path = format!("{table_location}/map_int96.parquet");
-
-        // MAP encoding:
-        //   optional group ts_map (MAP) {
-        //     repeated group key_value {
-        //       required binary key (UTF8);
-        //       optional int96 value;
-        //     }
-        //   }
-        let key_type = SchemaType::primitive_type_builder("key", PhysicalType::BYTE_ARRAY)
-            .with_repetition(Repetition::REQUIRED)
-            .with_logical_type(Some(parquet::basic::LogicalType::String))
-            .with_id(Some(2))
-            .build()
-            .unwrap();
-
-        let value_type = SchemaType::primitive_type_builder("value", PhysicalType::INT96)
-            .with_repetition(Repetition::OPTIONAL)
-            .with_id(Some(3))
+    #[test]
+    fn test_coerce_timestamp_in_struct() {
+        let iceberg = Schema::builder()
+            .with_schema_id(1)
+            .with_fields(vec![
+                NestedField::required(
+                    1,
+                    "data",
+                    Type::Struct(StructType::new(vec![
+                        NestedField::optional(2, "ts", Type::Primitive(PrimitiveType::Timestamp))
+                            .into(),
+                    ])),
+                )
+                .into(),
+            ])
             .build()
             .unwrap();
 
-        let key_value_group = SchemaType::group_type_builder("key_value")
-            .with_repetition(Repetition::REPEATED)
-            .with_fields(vec![Arc::new(key_type), Arc::new(value_type)])
-            .build()
-            .unwrap();
+        let arrow_schema = Arc::new(ArrowSchema::new(vec![
+            Field::new(
+                "data",
+                DataType::Struct(
+                    vec![
+                        Field::new("ts", DataType::Timestamp(TimeUnit::Nanosecond, None), true)
+                            .with_metadata(field_id_meta(2)),
+                    ]
+                    .into(),
+                ),
+                false,
+            )
+            .with_metadata(field_id_meta(1)),
+        ]));
 
-        let map_type = SchemaType::group_type_builder("ts_map")
-            .with_repetition(Repetition::OPTIONAL)
-            .with_id(Some(1))
-            .with_logical_type(Some(parquet::basic::LogicalType::Map))
-            .with_fields(vec![Arc::new(key_value_group)])
-            .build()
-            .unwrap();
+        let coerced = coerce_int96_timestamps(&arrow_schema, &iceberg).unwrap();
+        let inner = match coerced.field(0).data_type() {
+            DataType::Struct(fields) => fields,
+            other => panic!("Expected Struct, got {other}"),
+        };
+        assert_eq!(
+            inner[0].data_type(),
+            &DataType::Timestamp(TimeUnit::Microsecond, None)
+        );
+    }
 
-        let parquet_schema = SchemaType::group_type_builder("schema")
-            .with_fields(vec![Arc::new(map_type)])
+    #[test]
+    fn test_coerce_timestamp_in_list() {
+        let iceberg = Schema::builder()
+            .with_schema_id(1)
+            .with_fields(vec![
+                NestedField::optional(
+                    1,
+                    "timestamps",
+                    Type::List(ListType {
+                        element_field: NestedField::optional(
+                            2,
+                            "element",
+                            Type::Primitive(PrimitiveType::Timestamp),
+                        )
+                        .into(),
+                    }),
+                )
+                .into(),
+            ])
             .build()
             .unwrap();
 
-        let (int96_val, expected_micros) = make_int96_test_value();
-
-        let file = File::create(&file_path).unwrap();
-        let mut writer =
-            SerializedFileWriter::new(file, Arc::new(parquet_schema), Default::default()).unwrap();
-
-        // Write a single row with a map containing one key-value pair.
-        // rep=0 for both columns: start of a new map.
-        // key def=2: map present (1) + key_value entry present (2), key is REQUIRED.
-        // value def=3: map present (1) + key_value entry present (2) + value present (3).
-        let mut row_group = writer.next_row_group().unwrap();
-        {
-            let mut col = row_group.next_column().unwrap().unwrap();
-            col.typed::<ByteArrayType>()
-                .write_batch(
-                    &[parquet::data_type::ByteArray::from("event_time")],
-                    Some(&[2]),
-                    Some(&[0]),
-                )
-                .unwrap();
-            col.close().unwrap();
-        }
-        {
-            let mut col = row_group.next_column().unwrap().unwrap();
-            col.typed::<Int96Type>()
-                .write_batch(&[int96_val], Some(&[3]), Some(&[0]))
-                .unwrap();
-            col.close().unwrap();
-        }
-        row_group.close().unwrap();
-        writer.close().unwrap();
-
-        let iceberg_schema = Arc::new(
-            Schema::builder()
-                .with_schema_id(1)
-                .with_fields(vec![
-                    NestedField::optional(
-                        1,
-                        "ts_map",
-                        Type::Map(crate::spec::MapType {
-                            key_field: NestedField::required(
-                                2,
-                                "key",
-                                Type::Primitive(PrimitiveType::String),
-                            )
-                            .into(),
-                            value_field: NestedField::optional(
-                                3,
-                                "value",
-                                Type::Primitive(PrimitiveType::Timestamp),
-                            )
-                            .into(),
-                        }),
-                    )
-                    .into(),
-                ])
-                .build()
-                .unwrap(),
+        let element_field = Field::new(
+            "element",
+            DataType::Timestamp(TimeUnit::Nanosecond, None),
+            true,
+        )
+        .with_metadata(field_id_meta(2));
+        let arrow_schema = Arc::new(ArrowSchema::new(vec![
+            Field::new("timestamps", DataType::List(Arc::new(element_field)), true)
+                .with_metadata(field_id_meta(1)),
+        ]));
+
+        let coerced = coerce_int96_timestamps(&arrow_schema, &iceberg).unwrap();
+        let element_dt = match coerced.field(0).data_type() {
+            DataType::List(f) => f.data_type(),
+            other => panic!("Expected List, got {other}"),
+        };
+        assert_eq!(
+            element_dt,
+            &DataType::Timestamp(TimeUnit::Microsecond, None)
         );
+    }
 
-        let batches = read_int96_batches(&file_path, iceberg_schema, vec![1]).await;
-
-        assert_eq!(batches.len(), 1);
-        let map_array = batches[0]
-            .column(0)
-            .as_any()
-            .downcast_ref::<arrow_array::MapArray>()
-            .expect("Expected MapArray");
-        let ts_array = map_array
-            .values()
-            .as_any()
-            .downcast_ref::<TimestampMicrosecondArray>()
-            .expect("Expected TimestampMicrosecondArray as map values");
+    #[test]
+    fn test_coerce_timestamp_in_map_value() {
+        let iceberg = Schema::builder()
+            .with_schema_id(1)
+            .with_fields(vec![
+                NestedField::optional(
+                    1,
+                    "ts_map",
+                    Type::Map(MapType {
+                        key_field: NestedField::required(
+                            2,
+                            "key",
+                            Type::Primitive(PrimitiveType::String),
+                        )
+                        .into(),
+                        value_field: NestedField::optional(
+                            3,
+                            "value",
+                            Type::Primitive(PrimitiveType::Timestamp),
+                        )
+                        .into(),
+                    }),
+                )
+                .into(),
+            ])
+            .build()
+            .unwrap();
 
-        assert_eq!(
-            ts_array.value(0),
-            expected_micros,
-            "INT96 in map: got {}, expected {expected_micros}",
-            ts_array.value(0)
+        let key_field = Field::new("key", DataType::Utf8, false).with_metadata(field_id_meta(2));
+        let value_field = Field::new(
+            "value",
+            DataType::Timestamp(TimeUnit::Nanosecond, None),
+            true,
+        )
+        .with_metadata(field_id_meta(3));
+        let entries_field = Field::new(
+            "key_value",
+            DataType::Struct(vec![key_field, value_field].into()),
+            false,
         );
+        let arrow_schema = Arc::new(ArrowSchema::new(vec![
+            Field::new(
+                "ts_map",
+                DataType::Map(Arc::new(entries_field), false),
+                true,
+            )
+            .with_metadata(field_id_meta(1)),
+        ]));
+
+        let coerced = coerce_int96_timestamps(&arrow_schema, &iceberg).unwrap();
+        let value_dt = match coerced.field(0).data_type() {
+            DataType::Map(entries, _) => match entries.data_type() {
+                DataType::Struct(fields) => fields[1].data_type().clone(),
+                other => panic!("Expected Struct inside Map, got {other}"),
+            },
+            other => panic!("Expected Map, got {other}"),
+        };
+        assert_eq!(value_dt, DataType::Timestamp(TimeUnit::Microsecond, None));
     }
 }
diff --git a/crates/iceberg/src/arrow/reader.rs b/crates/iceberg/src/arrow/reader.rs
index 8eba4ec6c9..8cbd4a96c1 100644
--- a/crates/iceberg/src/arrow/reader.rs
+++ b/crates/iceberg/src/arrow/reader.rs
@@ -4689,4 +4689,557 @@ message schema {
         assert_eq!(result[1], expected_1);
         assert_eq!(result[2], expected_2);
     }
+
+    // INT96 encoding: [nanos_low_u32, nanos_high_u32, julian_day_u32]
+    // Julian day 2_440_588 = Unix epoch (1970-01-01)
+    const UNIX_EPOCH_JULIAN: i64 = 2_440_588;
+    const MICROS_PER_DAY: i64 = 86_400_000_000;
+    // Noon on 3333-01-01 (Julian day 2_953_529) — outside the i64 nanosecond range (~1677-2262).
+    const INT96_TEST_NANOS_WITHIN_DAY: u64 = 43_200_000_000_000;
+    const INT96_TEST_JULIAN_DAY: u32 = 2_953_529;
+
+    fn make_int96_test_value() -> (parquet::data_type::Int96, i64) {
+        let mut val = parquet::data_type::Int96::new();
+        val.set_data(
+            (INT96_TEST_NANOS_WITHIN_DAY & 0xFFFFFFFF) as u32,
+            (INT96_TEST_NANOS_WITHIN_DAY >> 32) as u32,
+            INT96_TEST_JULIAN_DAY,
+        );
+        let expected_micros = (INT96_TEST_JULIAN_DAY as i64 - UNIX_EPOCH_JULIAN) * MICROS_PER_DAY
+            + (INT96_TEST_NANOS_WITHIN_DAY / 1_000) as i64;
+        (val, expected_micros)
+    }
+
+    async fn read_int96_batches(
+        file_path: &str,
+        schema: SchemaRef,
+        project_field_ids: Vec<i32>,
+    ) -> Vec<RecordBatch> {
+        let file_io = FileIO::new_with_fs();
+        let reader = ArrowReaderBuilder::new(file_io).build();
+
+        let file_size = std::fs::metadata(file_path).unwrap().len();
+        let task = FileScanTask {
+            file_size_in_bytes: file_size,
+            start: 0,
+            length: file_size,
+            record_count: None,
+            data_file_path: file_path.to_string(),
+            data_file_format: DataFileFormat::Parquet,
+            schema,
+            project_field_ids,
+            predicate: None,
+            deletes: vec![],
+            partition: None,
+            partition_spec: None,
+            name_mapping: None,
+            case_sensitive: false,
+        };
+
+        let tasks = Box::pin(futures::stream::iter(vec![Ok(task)])) as FileScanTaskStream;
+        reader.read(tasks).unwrap().try_collect().await.unwrap()
+    }
+
+    // ArrowWriter cannot write INT96, so we use SerializedFileWriter directly.
+    fn write_int96_parquet_file(
+        table_location: &str,
+        filename: &str,
+        with_field_ids: bool,
+    ) -> (String, Vec<i64>) {
+        use parquet::basic::{Repetition, Type as PhysicalType};
+        use parquet::data_type::{Int32Type, Int96, Int96Type};
+        use parquet::file::writer::SerializedFileWriter;
+        use parquet::schema::types::Type as SchemaType;
+
+        let file_path = format!("{table_location}/{filename}");
+
+        let mut ts_builder = SchemaType::primitive_type_builder("ts", PhysicalType::INT96)
+            .with_repetition(Repetition::OPTIONAL);
+        let mut id_builder = SchemaType::primitive_type_builder("id", PhysicalType::INT32)
+            .with_repetition(Repetition::REQUIRED);
+
+        if with_field_ids {
+            ts_builder = ts_builder.with_id(Some(1));
+            id_builder = id_builder.with_id(Some(2));
+        }
+
+        let schema = SchemaType::group_type_builder("schema")
+            .with_fields(vec![
+                Arc::new(ts_builder.build().unwrap()),
+                Arc::new(id_builder.build().unwrap()),
+            ])
+            .build()
+            .unwrap();
+
+        // Dates outside the i64 nanosecond range (~1677-2262) overflow without coercion.
+        const NOON_NANOS: u64 = INT96_TEST_NANOS_WITHIN_DAY;
+        const JULIAN_3333: u32 = INT96_TEST_JULIAN_DAY;
+        const JULIAN_2100: u32 = 2_488_070;
+
+        let test_data: Vec<(u32, u32, u32, i64)> = vec![
+            // 3333-01-01 00:00:00
+            (
+                0,
+                0,
+                JULIAN_3333,
+                (JULIAN_3333 as i64 - UNIX_EPOCH_JULIAN) * MICROS_PER_DAY,
+            ),
+            // 3333-01-01 12:00:00
+            (
+                (NOON_NANOS & 0xFFFFFFFF) as u32,
+                (NOON_NANOS >> 32) as u32,
+                JULIAN_3333,
+                (JULIAN_3333 as i64 - UNIX_EPOCH_JULIAN) * MICROS_PER_DAY
+                    + (NOON_NANOS / 1_000) as i64,
+            ),
+            // 2100-01-01 00:00:00
+            (
+                0,
+                0,
+                JULIAN_2100,
+                (JULIAN_2100 as i64 - UNIX_EPOCH_JULIAN) * MICROS_PER_DAY,
+            ),
+        ];
+
+        let int96_values: Vec<Int96> = test_data
+            .iter()
+            .map(|(lo, hi, day, _)| {
+                let mut v = Int96::new();
+                v.set_data(*lo, *hi, *day);
+                v
+            })
+            .collect();
+
+        let id_values: Vec<i32> = (0..test_data.len() as i32).collect();
+        let expected_micros: Vec<i64> = test_data.iter().map(|(_, _, _, m)| *m).collect();
+
+        let file = File::create(&file_path).unwrap();
+        let mut writer =
+            SerializedFileWriter::new(file, Arc::new(schema), Default::default()).unwrap();
+
+        let mut row_group = writer.next_row_group().unwrap();
+        {
+            // def=1: ts is OPTIONAL and present. No repetition levels (top-level columns).
+            let mut col = row_group.next_column().unwrap().unwrap();
+            col.typed::<Int96Type>()
+                .write_batch(&int96_values, Some(&vec![1; test_data.len()]), None)
+                .unwrap();
+            col.close().unwrap();
+        }
+        {
+            let mut col = row_group.next_column().unwrap().unwrap();
+            col.typed::<Int32Type>()
+                .write_batch(&id_values, None, None)
+                .unwrap();
+            col.close().unwrap();
+        }
+        row_group.close().unwrap();
+        writer.close().unwrap();
+
+        (file_path, expected_micros)
+    }
+
+    async fn assert_int96_read_matches(
+        file_path: &str,
+        schema: SchemaRef,
+        project_field_ids: Vec<i32>,
+        expected_micros: &[i64],
+    ) {
+        use arrow_array::TimestampMicrosecondArray;
+
+        let batches = read_int96_batches(file_path, schema, project_field_ids).await;
+
+        assert_eq!(batches.len(), 1);
+        let ts_array = batches[0]
+            .column(0)
+            .as_any()
+            .downcast_ref::<TimestampMicrosecondArray>()
+            .expect("Expected TimestampMicrosecondArray");
+
+        for (i, expected) in expected_micros.iter().enumerate() {
+            assert_eq!(
+                ts_array.value(i),
+                *expected,
+                "Row {i}: got {}, expected {expected}",
+                ts_array.value(i)
+            );
+        }
+    }
+
+    #[tokio::test]
+    async fn test_read_int96_timestamps_with_field_ids() {
+        let schema = Arc::new(
+            Schema::builder()
+                .with_schema_id(1)
+                .with_fields(vec![
+                    NestedField::optional(1, "ts", Type::Primitive(PrimitiveType::Timestamp))
+                        .into(),
+                    NestedField::required(2, "id", Type::Primitive(PrimitiveType::Int)).into(),
+                ])
+                .build()
+                .unwrap(),
+        );
+
+        let tmp_dir = TempDir::new().unwrap();
+        let table_location = tmp_dir.path().to_str().unwrap().to_string();
+        let (file_path, expected_micros) =
+            write_int96_parquet_file(&table_location, "with_ids.parquet", true);
+
+        assert_int96_read_matches(&file_path, schema, vec![1, 2], &expected_micros).await;
+    }
+
+    #[tokio::test]
+    async fn test_read_int96_timestamps_without_field_ids() {
+        let schema = Arc::new(
+            Schema::builder()
+                .with_schema_id(1)
+                .with_fields(vec![
+                    NestedField::optional(1, "ts", Type::Primitive(PrimitiveType::Timestamp))
+                        .into(),
+                    NestedField::required(2, "id", Type::Primitive(PrimitiveType::Int)).into(),
+                ])
+                .build()
+                .unwrap(),
+        );
+
+        let tmp_dir = TempDir::new().unwrap();
+        let table_location = tmp_dir.path().to_str().unwrap().to_string();
+        let (file_path, expected_micros) =
+            write_int96_parquet_file(&table_location, "no_ids.parquet", false);
+
+        assert_int96_read_matches(&file_path, schema, vec![1, 2], &expected_micros).await;
+    }
+
+    #[tokio::test]
+    async fn test_read_int96_timestamps_in_struct() {
+        use arrow_array::{StructArray, TimestampMicrosecondArray};
+        use parquet::basic::{Repetition, Type as PhysicalType};
+        use parquet::data_type::Int96Type;
+        use parquet::file::writer::SerializedFileWriter;
+        use parquet::schema::types::Type as SchemaType;
+
+        let tmp_dir = TempDir::new().unwrap();
+        let table_location = tmp_dir.path().to_str().unwrap().to_string();
+        let file_path = format!("{table_location}/struct_int96.parquet");
+
+        let ts_type = SchemaType::primitive_type_builder("ts", PhysicalType::INT96)
+            .with_repetition(Repetition::OPTIONAL)
+            .with_id(Some(2))
+            .build()
+            .unwrap();
+
+        let struct_type = SchemaType::group_type_builder("data")
+            .with_repetition(Repetition::REQUIRED)
+            .with_id(Some(1))
+            .with_fields(vec![Arc::new(ts_type)])
+            .build()
+            .unwrap();
+
+        let parquet_schema = SchemaType::group_type_builder("schema")
+            .with_fields(vec![Arc::new(struct_type)])
+            .build()
+            .unwrap();
+
+        let (int96_val, expected_micros) = make_int96_test_value();
+
+        let file = File::create(&file_path).unwrap();
+        let mut writer =
+            SerializedFileWriter::new(file, Arc::new(parquet_schema), Default::default()).unwrap();
+
+        // def=1: struct is REQUIRED so no level, ts is OPTIONAL and present (1).
+        // No repetition levels needed (no repeated groups).
+        let mut row_group = writer.next_row_group().unwrap();
+        {
+            let mut col = row_group.next_column().unwrap().unwrap();
+            col.typed::<Int96Type>()
+                .write_batch(&[int96_val], Some(&[1]), None)
+                .unwrap();
+            col.close().unwrap();
+        }
+        row_group.close().unwrap();
+        writer.close().unwrap();
+
+        let iceberg_schema = Arc::new(
+            Schema::builder()
+                .with_schema_id(1)
+                .with_fields(vec![
+                    NestedField::required(
+                        1,
+                        "data",
+                        Type::Struct(crate::spec::StructType::new(vec![
+                            NestedField::optional(
+                                2,
+                                "ts",
+                                Type::Primitive(PrimitiveType::Timestamp),
+                            )
+                            .into(),
+                        ])),
+                    )
+                    .into(),
+                ])
+                .build()
+                .unwrap(),
+        );
+
+        let batches = read_int96_batches(&file_path, iceberg_schema, vec![1]).await;
+
+        assert_eq!(batches.len(), 1);
+        let struct_array = batches[0]
+            .column(0)
+            .as_any()
+            .downcast_ref::<StructArray>()
+            .expect("Expected StructArray");
+        let ts_array = struct_array
+            .column(0)
+            .as_any()
+            .downcast_ref::<TimestampMicrosecondArray>()
+            .expect("Expected TimestampMicrosecondArray inside struct");
+
+        assert_eq!(
+            ts_array.value(0),
+            expected_micros,
+            "INT96 in struct: got {}, expected {expected_micros}",
+            ts_array.value(0)
+        );
+    }
+
+    #[tokio::test]
+    async fn test_read_int96_timestamps_in_list() {
+        use arrow_array::{ListArray, TimestampMicrosecondArray};
+        use parquet::basic::{Repetition, Type as PhysicalType};
+        use parquet::data_type::Int96Type;
+        use parquet::file::writer::SerializedFileWriter;
+        use parquet::schema::types::Type as SchemaType;
+
+        let tmp_dir = TempDir::new().unwrap();
+        let table_location = tmp_dir.path().to_str().unwrap().to_string();
+        let file_path = format!("{table_location}/list_int96.parquet");
+
+        // 3-level LIST encoding:
+        //   optional group timestamps (LIST) {
+        //     repeated group list {
+        //       optional int96 element;
+        //     }
+        //   }
+        let element_type = SchemaType::primitive_type_builder("element", PhysicalType::INT96)
+            .with_repetition(Repetition::OPTIONAL)
+            .with_id(Some(2))
+            .build()
+            .unwrap();
+
+        let list_group = SchemaType::group_type_builder("list")
+            .with_repetition(Repetition::REPEATED)
+            .with_fields(vec![Arc::new(element_type)])
+            .build()
+            .unwrap();
+
+        let list_type = SchemaType::group_type_builder("timestamps")
+            .with_repetition(Repetition::OPTIONAL)
+            .with_id(Some(1))
+            .with_logical_type(Some(parquet::basic::LogicalType::List))
+            .with_fields(vec![Arc::new(list_group)])
+            .build()
+            .unwrap();
+
+        let parquet_schema = SchemaType::group_type_builder("schema")
+            .with_fields(vec![Arc::new(list_type)])
+            .build()
+            .unwrap();
+
+        let (int96_val, expected_micros) = make_int96_test_value();
+
+        let file = File::create(&file_path).unwrap();
+        let mut writer =
+            SerializedFileWriter::new(file, Arc::new(parquet_schema), Default::default()).unwrap();
+
+        // Write a single row with a list containing one INT96 element.
+        // def=3: list present (1) + repeated group (2) + element present (3)
+        // rep=0: start of a new list
+        let mut row_group = writer.next_row_group().unwrap();
+        {
+            let mut col = row_group.next_column().unwrap().unwrap();
+            col.typed::<Int96Type>()
+                .write_batch(&[int96_val], Some(&[3]), Some(&[0]))
+                .unwrap();
+            col.close().unwrap();
+        }
+        row_group.close().unwrap();
+        writer.close().unwrap();
+
+        let iceberg_schema = Arc::new(
+            Schema::builder()
+                .with_schema_id(1)
+                .with_fields(vec![
+                    NestedField::optional(
+                        1,
+                        "timestamps",
+                        Type::List(crate::spec::ListType {
+                            element_field: NestedField::optional(
+                                2,
+                                "element",
+                                Type::Primitive(PrimitiveType::Timestamp),
+                            )
+                            .into(),
+                        }),
+                    )
+                    .into(),
+                ])
+                .build()
+                .unwrap(),
+        );
+
+        let batches = read_int96_batches(&file_path, iceberg_schema, vec![1]).await;
+
+        assert_eq!(batches.len(), 1);
+        let list_array = batches[0]
+            .column(0)
+            .as_any()
+            .downcast_ref::<ListArray>()
+            .expect("Expected ListArray");
+        let ts_array = list_array
+            .values()
+            .as_any()
+            .downcast_ref::<TimestampMicrosecondArray>()
+            .expect("Expected TimestampMicrosecondArray inside list");
+
+        assert_eq!(
+            ts_array.value(0),
+            expected_micros,
+            "INT96 in list: got {}, expected {expected_micros}",
+            ts_array.value(0)
+        );
+    }
+
+    #[tokio::test]
+    async fn test_read_int96_timestamps_in_map() {
+        use arrow_array::{MapArray, TimestampMicrosecondArray};
+        use parquet::basic::{Repetition, Type as PhysicalType};
+        use parquet::data_type::{ByteArrayType, Int96Type};
+        use parquet::file::writer::SerializedFileWriter;
+        use parquet::schema::types::Type as SchemaType;
+
+        let tmp_dir = TempDir::new().unwrap();
+        let table_location = tmp_dir.path().to_str().unwrap().to_string();
+        let file_path = format!("{table_location}/map_int96.parquet");
+
+        // MAP encoding:
+        //   optional group ts_map (MAP) {
+        //     repeated group key_value {
+        //       required binary key (UTF8);
+        //       optional int96 value;
+        //     }
+        //   }
+        let key_type = SchemaType::primitive_type_builder("key", PhysicalType::BYTE_ARRAY)
+            .with_repetition(Repetition::REQUIRED)
+            .with_logical_type(Some(parquet::basic::LogicalType::String))
+            .with_id(Some(2))
+            .build()
+            .unwrap();
+
+        let value_type = SchemaType::primitive_type_builder("value", PhysicalType::INT96)
+            .with_repetition(Repetition::OPTIONAL)
+            .with_id(Some(3))
+            .build()
+            .unwrap();
+
+        let key_value_group = SchemaType::group_type_builder("key_value")
+            .with_repetition(Repetition::REPEATED)
+            .with_fields(vec![Arc::new(key_type), Arc::new(value_type)])
+            .build()
+            .unwrap();
+
+        let map_type = SchemaType::group_type_builder("ts_map")
+            .with_repetition(Repetition::OPTIONAL)
+            .with_id(Some(1))
+            .with_logical_type(Some(parquet::basic::LogicalType::Map))
+            .with_fields(vec![Arc::new(key_value_group)])
+            .build()
+            .unwrap();
+
+        let parquet_schema = SchemaType::group_type_builder("schema")
+            .with_fields(vec![Arc::new(map_type)])
+            .build()
+            .unwrap();
+
+        let (int96_val, expected_micros) = make_int96_test_value();
+
+        let file = File::create(&file_path).unwrap();
+        let mut writer =
+            SerializedFileWriter::new(file, Arc::new(parquet_schema), Default::default()).unwrap();
+
+        // Write a single row with a map containing one key-value pair.
+        // rep=0 for both columns: start of a new map.
+        // key def=2: map present (1) + key_value entry present (2), key is REQUIRED.
+        // value def=3: map present (1) + key_value entry present (2) + value present (3).
+        let mut row_group = writer.next_row_group().unwrap();
+        {
+            let mut col = row_group.next_column().unwrap().unwrap();
+            col.typed::<ByteArrayType>()
+                .write_batch(
+                    &[parquet::data_type::ByteArray::from("event_time")],
+                    Some(&[2]),
+                    Some(&[0]),
+                )
+                .unwrap();
+            col.close().unwrap();
+        }
+        {
+            let mut col = row_group.next_column().unwrap().unwrap();
+            col.typed::<Int96Type>()
+                .write_batch(&[int96_val], Some(&[3]), Some(&[0]))
+                .unwrap();
+            col.close().unwrap();
+        }
+        row_group.close().unwrap();
+        writer.close().unwrap();
+
+        let iceberg_schema = Arc::new(
+            Schema::builder()
+                .with_schema_id(1)
+                .with_fields(vec![
+                    NestedField::optional(
+                        1,
+                        "ts_map",
+                        Type::Map(crate::spec::MapType {
+                            key_field: NestedField::required(
+                                2,
+                                "key",
+                                Type::Primitive(PrimitiveType::String),
+                            )
+                            .into(),
+                            value_field: NestedField::optional(
+                                3,
+                                "value",
+                                Type::Primitive(PrimitiveType::Timestamp),
+                            )
+                            .into(),
+                        }),
+                    )
+                    .into(),
+                ])
+                .build()
+                .unwrap(),
+        );
+
+        let batches = read_int96_batches(&file_path, iceberg_schema, vec![1]).await;
+
+        assert_eq!(batches.len(), 1);
+        let map_array = batches[0]
+            .column(0)
+            .as_any()
+            .downcast_ref::<MapArray>()
+            .expect("Expected MapArray");
+        let ts_array = map_array
+            .values()
+            .as_any()
+            .downcast_ref::<TimestampMicrosecondArray>()
+            .expect("Expected TimestampMicrosecondArray as map values");
+
+        assert_eq!(
+            ts_array.value(0),
+            expected_micros,
+            "INT96 in map: got {}, expected {expected_micros}",
+            ts_array.value(0)
+        );
+    }
 }