test written and linter ran

rambleraptor · rambleraptor · commit 23e2cb24d12e · 2025-08-06T12:59:22.000-07:00
diff --git a/pyiceberg/catalog/__init__.py b/pyiceberg/catalog/__init__.py
@@ -744,7 +744,9 @@ def _load_file_io(self, properties: Properties = EMPTY_DICT, location: Optional[
         return load_file_io({**self.properties, **properties}, location)
 
     @staticmethod
-    def _convert_schema_if_needed(schema: Union[Schema, "pa.Schema"], format_version: TableVersion = TableProperties.DEFAULT_FORMAT_VERSION) -> Schema:
+    def _convert_schema_if_needed(
+        schema: Union[Schema, "pa.Schema"], format_version: TableVersion = TableProperties.DEFAULT_FORMAT_VERSION
+    ) -> Schema:
         if isinstance(schema, Schema):
             return schema
         try:
@@ -755,7 +757,10 @@ def _convert_schema_if_needed(schema: Union[Schema, "pa.Schema"], format_version
             downcast_ns_timestamp_to_us = Config().get_bool(DOWNCAST_NS_TIMESTAMP_TO_US_ON_WRITE) or False
             if isinstance(schema, pa.Schema):
                 schema: Schema = visit_pyarrow(  # type: ignore
-                    schema, _ConvertToIcebergWithoutIDs(downcast_ns_timestamp_to_us=downcast_ns_timestamp_to_us, format_version=format_version)
+                    schema,
+                    _ConvertToIcebergWithoutIDs(
+                        downcast_ns_timestamp_to_us=downcast_ns_timestamp_to_us, format_version=format_version
+                    ),
                 )
                 return schema
         except ModuleNotFoundError:
@@ -848,7 +853,9 @@ def _create_staged_table(
         Returns:
             StagedTable: the created staged table instance.
         """
-        schema: Schema = self._convert_schema_if_needed(schema, properties.get(TableProperties.FORMAT_VERSION, TableProperties.DEFAULT_FORMAT_VERSION))  # type: ignore
+        schema: Schema = self._convert_schema_if_needed(
+            schema, int(properties.get(TableProperties.FORMAT_VERSION, TableProperties.DEFAULT_FORMAT_VERSION))
+        )  # type: ignore
 
         database_name, table_name = self.identifier_to_database_and_table(identifier)
 
diff --git a/pyiceberg/catalog/rest/__init__.py b/pyiceberg/catalog/rest/__init__.py
@@ -499,7 +499,9 @@ def _create_table(
         properties: Properties = EMPTY_DICT,
         stage_create: bool = False,
     ) -> TableResponse:
-        iceberg_schema = self._convert_schema_if_needed(schema, properties.get(TableProperties.FORMAT_VERSION, TableProperties.DEFAULT_FORMAT_VERSION))
+        iceberg_schema = self._convert_schema_if_needed(
+            schema, int(properties.get(TableProperties.FORMAT_VERSION, TableProperties.DEFAULT_FORMAT_VERSION))
+        )
         fresh_schema = assign_fresh_schema_ids(iceberg_schema)
         fresh_partition_spec = assign_fresh_partition_spec_ids(partition_spec, iceberg_schema, fresh_schema)
         fresh_sort_order = assign_fresh_sort_order_ids(sort_order, iceberg_schema, fresh_schema)
diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py
@@ -1019,22 +1019,36 @@ def _combine_positional_deletes(positional_deletes: List[pa.ChunkedArray], start
 
 
 def pyarrow_to_schema(
-    schema: pa.Schema, name_mapping: Optional[NameMapping] = None, downcast_ns_timestamp_to_us: bool = False, format_version: TableVersion = TableProperties.DEFAULT_FORMAT_VERSION
+    schema: pa.Schema,
+    name_mapping: Optional[NameMapping] = None,
+    downcast_ns_timestamp_to_us: bool = False,
+    format_version: TableVersion = TableProperties.DEFAULT_FORMAT_VERSION,
 ) -> Schema:
     has_ids = visit_pyarrow(schema, _HasIds())
     if has_ids:
-        return visit_pyarrow(schema, _ConvertToIceberg(downcast_ns_timestamp_to_us=downcast_ns_timestamp_to_us, format_version=format_version))
+        return visit_pyarrow(
+            schema, _ConvertToIceberg(downcast_ns_timestamp_to_us=downcast_ns_timestamp_to_us, format_version=format_version)
+        )
     elif name_mapping is not None:
-        schema_without_ids = _pyarrow_to_schema_without_ids(schema, downcast_ns_timestamp_to_us=downcast_ns_timestamp_to_us, format_version=format_version)
+        schema_without_ids = _pyarrow_to_schema_without_ids(
+            schema, downcast_ns_timestamp_to_us=downcast_ns_timestamp_to_us, format_version=format_version
+        )
         return apply_name_mapping(schema_without_ids, name_mapping)
     else:
         raise ValueError(
             "Parquet file does not have field-ids and the Iceberg table does not have 'schema.name-mapping.default' defined"
         )
 
 
-def _pyarrow_to_schema_without_ids(schema: pa.Schema, downcast_ns_timestamp_to_us: bool = False, format_version: TableVersion = TableProperties.DEFAULT_FORMAT_VERSION) -> Schema:
-    return visit_pyarrow(schema, _ConvertToIcebergWithoutIDs(downcast_ns_timestamp_to_us=downcast_ns_timestamp_to_us, format_version=format_version))
+def _pyarrow_to_schema_without_ids(
+    schema: pa.Schema,
+    downcast_ns_timestamp_to_us: bool = False,
+    format_version: TableVersion = TableProperties.DEFAULT_FORMAT_VERSION,
+) -> Schema:
+    return visit_pyarrow(
+        schema,
+        _ConvertToIcebergWithoutIDs(downcast_ns_timestamp_to_us=downcast_ns_timestamp_to_us, format_version=format_version),
+    )
 
 
 def _pyarrow_schema_ensure_large_types(schema: pa.Schema) -> pa.Schema:
@@ -1113,7 +1127,7 @@ def _(obj: pa.Field, visitor: PyArrowSchemaVisitor[T]) -> T:
     visitor.before_field(obj)
     try:
         if obj.name == "timestamp_ns":
-            print('alexstephen')
+            print("alexstephen")
         result = visit_pyarrow(field_type, visitor)
     except TypeError as e:
         raise UnsupportedPyArrowTypeException(obj, f"Column '{obj.name}' has an unsupported type: {field_type}") from e
@@ -1218,7 +1232,9 @@ class _ConvertToIceberg(PyArrowSchemaVisitor[Union[IcebergType, Schema]]):
 
     _field_names: List[str]
 
-    def __init__(self, downcast_ns_timestamp_to_us: bool = False, format_version: TableVersion = TableProperties.DEFAULT_FORMAT_VERSION) -> None:  # noqa: F821
+    def __init__(
+        self, downcast_ns_timestamp_to_us: bool = False, format_version: TableVersion = TableProperties.DEFAULT_FORMAT_VERSION
+    ) -> None:  # noqa: F821
         self._field_names = []
         self._downcast_ns_timestamp_to_us = downcast_ns_timestamp_to_us
         self._format_version = format_version
@@ -2549,8 +2565,10 @@ def bin_pack_arrow_table(tbl: pa.Table, target_file_size: int) -> Iterator[List[
 
 
 def _check_pyarrow_schema_compatible(
-    requested_schema: Schema, provided_schema: pa.Schema, downcast_ns_timestamp_to_us: bool = False,
-    format_version: TableVersion = TableProperties.DEFAULT_FORMAT_VERSION
+    requested_schema: Schema,
+    provided_schema: pa.Schema,
+    downcast_ns_timestamp_to_us: bool = False,
+    format_version: TableVersion = TableProperties.DEFAULT_FORMAT_VERSION,
 ) -> None:
     """
     Check if the `requested_schema` is compatible with `provided_schema`.
@@ -2563,10 +2581,15 @@ def _check_pyarrow_schema_compatible(
     name_mapping = requested_schema.name_mapping
     try:
         provided_schema = pyarrow_to_schema(
-            provided_schema, name_mapping=name_mapping, downcast_ns_timestamp_to_us=downcast_ns_timestamp_to_us, format_version=format_version
+            provided_schema,
+            name_mapping=name_mapping,
+            downcast_ns_timestamp_to_us=downcast_ns_timestamp_to_us,
+            format_version=format_version,
         )
     except ValueError as e:
-        provided_schema = _pyarrow_to_schema_without_ids(provided_schema, downcast_ns_timestamp_to_us=downcast_ns_timestamp_to_us, format_version=format_version)
+        provided_schema = _pyarrow_to_schema_without_ids(
+            provided_schema, downcast_ns_timestamp_to_us=downcast_ns_timestamp_to_us, format_version=format_version
+        )
         additional_names = set(provided_schema._name_to_id.keys()) - set(requested_schema._name_to_id.keys())
         raise ValueError(
             f"PyArrow table contains more columns: {', '.join(sorted(additional_names))}. Update the schema first (hint, use union_by_name)."
@@ -2683,7 +2706,12 @@ def _dataframe_to_data_files(
     )
     name_mapping = table_metadata.schema().name_mapping
     downcast_ns_timestamp_to_us = Config().get_bool(DOWNCAST_NS_TIMESTAMP_TO_US_ON_WRITE) or False
-    task_schema = pyarrow_to_schema(df.schema, name_mapping=name_mapping, downcast_ns_timestamp_to_us=downcast_ns_timestamp_to_us, format_version=table_metadata.format_version)
+    task_schema = pyarrow_to_schema(
+        df.schema,
+        name_mapping=name_mapping,
+        downcast_ns_timestamp_to_us=downcast_ns_timestamp_to_us,
+        format_version=table_metadata.format_version,
+    )
 
     if table_metadata.spec().is_unpartitioned():
         yield from write_file(
diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py
@@ -477,8 +477,10 @@ def append(self, df: pa.Table, snapshot_properties: Dict[str, str] = EMPTY_DICT,
             )
         downcast_ns_timestamp_to_us = Config().get_bool(DOWNCAST_NS_TIMESTAMP_TO_US_ON_WRITE) or False
         _check_pyarrow_schema_compatible(
-            self.table_metadata.schema(), provided_schema=df.schema, downcast_ns_timestamp_to_us=downcast_ns_timestamp_to_us,
-            format_version=self.table_metadata.format_version
+            self.table_metadata.schema(),
+            provided_schema=df.schema,
+            downcast_ns_timestamp_to_us=downcast_ns_timestamp_to_us,
+            format_version=self.table_metadata.format_version,
         )
 
         with self._append_snapshot_producer(snapshot_properties, branch=branch) as append_files:
@@ -528,8 +530,10 @@ def dynamic_partition_overwrite(
 
         downcast_ns_timestamp_to_us = Config().get_bool(DOWNCAST_NS_TIMESTAMP_TO_US_ON_WRITE) or False
         _check_pyarrow_schema_compatible(
-            self.table_metadata.schema(), provided_schema=df.schema, downcast_ns_timestamp_to_us=downcast_ns_timestamp_to_us,
-            format_version=self.table_metadata.format_version
+            self.table_metadata.schema(),
+            provided_schema=df.schema,
+            downcast_ns_timestamp_to_us=downcast_ns_timestamp_to_us,
+            format_version=self.table_metadata.format_version,
         )
 
         # If dataframe does not have data, there is no need to overwrite
@@ -595,8 +599,10 @@ def overwrite(
             )
         downcast_ns_timestamp_to_us = Config().get_bool(DOWNCAST_NS_TIMESTAMP_TO_US_ON_WRITE) or False
         _check_pyarrow_schema_compatible(
-            self.table_metadata.schema(), provided_schema=df.schema, downcast_ns_timestamp_to_us=downcast_ns_timestamp_to_us,
-            format_version=self.table_metadata.format_version
+            self.table_metadata.schema(),
+            provided_schema=df.schema,
+            downcast_ns_timestamp_to_us=downcast_ns_timestamp_to_us,
+            format_version=self.table_metadata.format_version,
         )
 
         if overwrite_filter != AlwaysFalse():
@@ -792,8 +798,10 @@ def upsert(
 
         downcast_ns_timestamp_to_us = Config().get_bool(DOWNCAST_NS_TIMESTAMP_TO_US_ON_WRITE) or False
         _check_pyarrow_schema_compatible(
-            self.table_metadata.schema(), provided_schema=df.schema, downcast_ns_timestamp_to_us=downcast_ns_timestamp_to_us,
-            format_version=self.table_metadata.format_version
+            self.table_metadata.schema(),
+            provided_schema=df.schema,
+            downcast_ns_timestamp_to_us=downcast_ns_timestamp_to_us,
+            format_version=self.table_metadata.format_version,
         )
 
         # get list of rows that exist so we don't have to load the entire target table
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -2830,6 +2830,7 @@ def arrow_table_schema_with_nanoseconds_timestamp_precisions() -> "pa.Schema":
         ]
     )
 
+
 @pytest.fixture(scope="session")
 def table_schema_with_all_microseconds_timestamp_precision() -> Schema:
     """Iceberg table Schema with only date, timestamp and timestamptz values."""
diff --git a/tests/integration/test_writes/test_writes.py b/tests/integration/test_writes/test_writes.py
@@ -1355,6 +1355,7 @@ def test_write_all_timestamp_precision(
                 # and supports upto microsecond precision
                 assert left.timestamp() == right.timestamp(), f"Difference in column {column}: {left} != {right}"
 
+
 @pytest.mark.integration
 @pytest.mark.parametrize("format_version", [1, 2])
 def test_merge_manifests(session_catalog: Catalog, arrow_table_with_null: pa.Table, format_version: int) -> None:
@@ -2114,3 +2115,18 @@ def test_branch_py_write_spark_read(session_catalog: Catalog, spark: SparkSessio
     )
     assert main_df.count() == 3
     assert branch_df.count() == 2
+
+
+@pytest.mark.integration
+def test_nanosecond_support_on_catalog(session_catalog: Catalog) -> None:
+    identifier = "default.test_nanosecond_support_on_catalog"
+    # Create a pyarrow table with a nanosecond timestamp column
+    table = pa.Table.from_arrays(
+        [
+            pa.array([datetime.now()], type=pa.timestamp("ns")),
+            pa.array([datetime.now()], type=pa.timestamp("ns", tz="America/New_York")),
+        ],
+        names=["timestamp_ns", "timestamptz_ns"],
+    )
+
+    _create_table(session_catalog, identifier, {"format-version": "3"}, schema=table.schema)

Original file line number	Diff line number	Diff line change
`@@ -2830,6 +2830,7 @@ def arrow_table_schema_with_nanoseconds_timestamp_precisions() -> "pa.Schema":`
`2830`	`2830`	`]`
`2831`	`2831`	`)`
`2832`	`2832`
	`2833`	`+`
`2833`	`2834`	`@pytest.fixture(scope="session")`
`2834`	`2835`	`def table_schema_with_all_microseconds_timestamp_precision() -> Schema:`
`2835`	`2836`	`"""Iceberg table Schema with only date, timestamp and timestamptz values."""`