Skip to content

Commit 0506356

Browse files
committed
Add as_arrow() to Schema class
1 parent b447461 commit 0506356

File tree

3 files changed

+43
-0
lines changed

3 files changed

+43
-0
lines changed

mkdocs/docs/api.md

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,25 @@ long: [[4.896029,-122.431297,6.0989,2.349014],[6.56667]]
295295

296296
The nested lists indicate the different Arrow buffers, where the first write results into a buffer, and the second append in a separate buffer. This is expected since it will read two parquet files.
297297

298+
299+
To avoid any type errors during writing, you can enforce the PyArrow table types using the Iceberg table schema:
300+
301+
```python
302+
from pyiceberg.catalog import load_catalog
303+
import pyarrow as pa
304+
305+
catalog = load_catalog("default")
306+
table = catalog.load_table('default.cities')
307+
schema = table.schema().as_arrow()
308+
309+
df = pa.Table.from_pylist(
310+
[{"city": "Groningen", "lat": 53.21917, "long": 6.56667}],
311+
schema=schema
312+
)
313+
314+
table.append(df)
315+
```
316+
298317
<!-- prettier-ignore-start -->
299318

300319
!!! example "Under development"

pyiceberg/schema.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,8 @@
6464
)
6565

6666
if TYPE_CHECKING:
67+
import pyarrow as pa
68+
6769
from pyiceberg.table.name_mapping import (
6870
NameMapping,
6971
)
@@ -180,6 +182,12 @@ def as_struct(self) -> StructType:
180182
"""Return the schema as a struct."""
181183
return StructType(*self.fields)
182184

185+
def as_arrow(self) -> "pa.Schema":
186+
"""Return the schema as an Arrow schema."""
187+
188+
from pyiceberg.io.pyarrow import schema_to_pyarrow
189+
return schema_to_pyarrow(self)
190+
183191
def find_field(self, name_or_id: Union[str, int], case_sensitive: bool = True) -> NestedField:
184192
"""Find a field using a field name or field ID.
185193

tests/test_schema.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1600,3 +1600,19 @@ def test_union_with_pa_schema(primitive_fields: NestedField) -> None:
16001600
)
16011601

16021602
assert new_schema == expected_schema
1603+
1604+
1605+
def test_arrow_schema() -> None:
1606+
base_schema = Schema(
1607+
NestedField(field_id=1, name="foo", field_type=StringType(), required=True),
1608+
NestedField(field_id=2, name="bar", field_type=IntegerType(), required=False),
1609+
NestedField(field_id=3, name="baz", field_type=BooleanType(), required=False),
1610+
)
1611+
1612+
expected_schema = pa.schema([
1613+
pa.field("foo", pa.string(), nullable=False),
1614+
pa.field("bar", pa.int32(), nullable=True),
1615+
pa.field("baz", pa.bool_(), nullable=True),
1616+
])
1617+
1618+
assert base_schema.as_arrow() == expected_schema

0 commit comments

Comments
 (0)