Skip to content

Commit 219f46b

Browse files
committed
Merge branch 'main' of github.com:apache/iceberg-python into fd-rust
2 parents 84cfe0a + 58e5ad6 commit 219f46b

File tree

21 files changed

+730
-102
lines changed

21 files changed

+730
-102
lines changed

dev/docker-compose-integration.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ services:
5353
- CATALOG_WAREHOUSE=s3://warehouse/
5454
- CATALOG_IO__IMPL=org.apache.iceberg.aws.s3.S3FileIO
5555
- CATALOG_S3_ENDPOINT=http://minio:9000
56+
- CATALOG_JDBC_STRICT__MODE=true
5657
minio:
5758
image: minio/minio
5859
container_name: pyiceberg-minio

mkdocs/docs/community.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ Community discussions happen primarily on the [dev mailing list](https://lists.a
3030

3131
## Iceberg Community Events
3232

33-
The PyIceberg community sync is on the last Tuesday of every month. To join, make sure to subscribe to the [iceberg-python-sync Google group](https://groups.google.com/g/iceberg-python-sync).
33+
The PyIceberg community sync is on the last Tuesday of every month. The calendar event is located on the [Iceberg Dev Events](https://iceberg.apache.org/community#iceberg-community-events) calendar.
3434

3535
## Community Guidelines
3636

mkdocs/docs/configuration.md

Lines changed: 74 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -359,7 +359,9 @@ catalog:
359359

360360
#### Authentication Options
361361

362-
##### OAuth2
362+
##### Legacy OAuth2
363+
364+
Legacy OAuth2 Properties will be removed in PyIceberg 1.0 in place of pluggable AuthManager properties below
363365

364366
| Key | Example | Description |
365367
| ------------------- | -------------------------------- | -------------------------------------------------------------------------------------------------- |
@@ -378,6 +380,77 @@ catalog:
378380
| rest.signing-region | us-east-1 | The region to use when SigV4 signing a request |
379381
| rest.signing-name | execute-api | The service signing name to use when SigV4 signing a request |
380382

383+
##### Pluggable Authentication via AuthManager
384+
385+
The RESTCatalog supports pluggable authentication via the `auth` configuration block. This allows you to specify which how the access token will be fetched and managed for use with the HTTP requests to the RESTCatalog server. The authentication method is selected by setting the `auth.type` property, and additional configuration can be provided as needed for each method.
386+
387+
###### Supported Authentication Types
388+
389+
- `noop`: No authentication (no Authorization header sent).
390+
- `basic`: HTTP Basic authentication.
391+
- `custom`: Custom authentication manager (requires `auth.impl`).
392+
393+
###### Configuration Properties
394+
395+
The `auth` block is structured as follows:
396+
397+
```yaml
398+
catalog:
399+
default:
400+
type: rest
401+
uri: http://rest-catalog/ws/
402+
auth:
403+
type: <auth_type>
404+
<auth_type>:
405+
# Type-specific configuration
406+
impl: <custom_class_path> # Only for custom auth
407+
```
408+
409+
###### Property Reference
410+
411+
| Property | Required | Description |
412+
|------------------|----------|-------------------------------------------------------------------------------------------------|
413+
| `auth.type` | Yes | The authentication type to use (`noop`, `basic`, or `custom`). |
414+
| `auth.impl` | Conditionally | The fully qualified class path for a custom AuthManager. Required if `auth.type` is `custom`. |
415+
| `auth.basic` | If type is `basic` | Block containing `username` and `password` for HTTP Basic authentication. |
416+
| `auth.custom` | If type is `custom` | Block containing configuration for the custom AuthManager. |
417+
418+
###### Examples
419+
420+
No Authentication:
421+
422+
```yaml
423+
auth:
424+
type: noop
425+
```
426+
427+
Basic Authentication:
428+
429+
```yaml
430+
auth:
431+
type: basic
432+
basic:
433+
username: myuser
434+
password: mypass
435+
```
436+
437+
Custom Authentication:
438+
439+
```yaml
440+
auth:
441+
type: custom
442+
impl: mypackage.module.MyAuthManager
443+
custom:
444+
property1: value1
445+
property2: value2
446+
```
447+
448+
###### Notes
449+
450+
- If `auth.type` is `custom`, you **must** specify `auth.impl` with the full class path to your custom AuthManager.
451+
- If `auth.type` is not `custom`, specifying `auth.impl` is not allowed.
452+
- The configuration block under each type (e.g., `basic`, `custom`) is passed as keyword arguments to the corresponding AuthManager.
453+
381454
<!-- markdown-link-check-enable-->
382455

383456
#### Common Integrations & Examples

poetry.lock

Lines changed: 41 additions & 58 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyiceberg/catalog/hive.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -800,7 +800,7 @@ def update_namespace_properties(
800800
if removals:
801801
for key in removals:
802802
if key in parameters:
803-
parameters[key] = None
803+
parameters.pop(key)
804804
removed.add(key)
805805
if updates:
806806
for key, value in updates.items():

pyiceberg/catalog/rest/__init__.py

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,8 @@ class IdentifierKind(Enum):
134134
SIGV4_SERVICE = "rest.signing-name"
135135
OAUTH2_SERVER_URI = "oauth2-server-uri"
136136
SNAPSHOT_LOADING_MODE = "snapshot-loading-mode"
137+
AUTH = "auth"
138+
CUSTOM = "custom"
137139

138140
NAMESPACE_SEPARATOR = b"\x1f".decode(UTF8)
139141

@@ -247,7 +249,23 @@ def _create_session(self) -> Session:
247249
elif ssl_client_cert := ssl_client.get(CERT):
248250
session.cert = ssl_client_cert
249251

250-
session.auth = AuthManagerAdapter(self._create_legacy_oauth2_auth_manager(session))
252+
if auth_config := self.properties.get(AUTH):
253+
auth_type = auth_config.get("type")
254+
if auth_type is None:
255+
raise ValueError("auth.type must be defined")
256+
auth_type_config = auth_config.get(auth_type, {})
257+
auth_impl = auth_config.get("impl")
258+
259+
if auth_type == CUSTOM and not auth_impl:
260+
raise ValueError("auth.impl must be specified when using custom auth.type")
261+
262+
if auth_type != CUSTOM and auth_impl:
263+
raise ValueError("auth.impl can only be specified when using custom auth.type")
264+
265+
session.auth = AuthManagerAdapter(AuthManagerFactory.create(auth_impl or auth_type, auth_type_config))
266+
else:
267+
session.auth = AuthManagerAdapter(self._create_legacy_oauth2_auth_manager(session))
268+
251269
# Set HTTP headers
252270
self._config_headers(session)
253271

@@ -505,7 +523,7 @@ def _create_table(
505523
try:
506524
response.raise_for_status()
507525
except HTTPError as exc:
508-
_handle_non_200_response(exc, {409: TableAlreadyExistsError})
526+
_handle_non_200_response(exc, {409: TableAlreadyExistsError, 404: NoSuchNamespaceError})
509527
return TableResponse.model_validate_json(response.text)
510528

511529
@retry(**_RETRY_ARGS)

pyiceberg/catalog/rest/auth.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,11 +42,15 @@ def auth_header(self) -> Optional[str]:
4242

4343

4444
class NoopAuthManager(AuthManager):
45+
"""Auth Manager implementation with no auth."""
46+
4547
def auth_header(self) -> Optional[str]:
4648
return None
4749

4850

4951
class BasicAuthManager(AuthManager):
52+
"""AuthManager implementation that supports basic password auth."""
53+
5054
def __init__(self, username: str, password: str):
5155
credentials = f"{username}:{password}"
5256
self._token = base64.b64encode(credentials.encode()).decode()
@@ -56,6 +60,12 @@ def auth_header(self) -> str:
5660

5761

5862
class LegacyOAuth2AuthManager(AuthManager):
63+
"""Legacy OAuth2 AuthManager implementation.
64+
65+
This class exists for backward compatibility, and will be removed in
66+
PyIceberg 1.0.0 in favor of OAuth2AuthManager.
67+
"""
68+
5969
_session: Session
6070
_auth_url: Optional[str]
6171
_token: Optional[str]

pyiceberg/io/pyarrow.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2410,9 +2410,12 @@ def data_file_statistics_from_parquet_metadata(
24102410
continue
24112411

24122412
if field_id not in col_aggs:
2413-
col_aggs[field_id] = StatsAggregator(
2414-
stats_col.iceberg_type, statistics.physical_type, stats_col.mode.length
2415-
)
2413+
try:
2414+
col_aggs[field_id] = StatsAggregator(
2415+
stats_col.iceberg_type, statistics.physical_type, stats_col.mode.length
2416+
)
2417+
except ValueError as e:
2418+
raise ValueError(f"{e} for column '{stats_col.column_name}'") from e
24162419

24172420
if isinstance(stats_col.iceberg_type, DecimalType) and statistics.physical_type != "FIXED_LEN_BYTE_ARRAY":
24182421
scale = stats_col.iceberg_type.scale

pyiceberg/table/__init__.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1702,7 +1702,14 @@ def to_polars(self) -> pl.DataFrame: ...
17021702

17031703
def update(self: S, **overrides: Any) -> S:
17041704
"""Create a copy of this table scan with updated fields."""
1705-
return type(self)(**{**self.__dict__, **overrides})
1705+
from inspect import signature
1706+
1707+
# Extract those attributes that are constructor parameters. We don't use self.__dict__ as the kwargs to the
1708+
# constructors because it may contain additional attributes that are not part of the constructor signature.
1709+
params = signature(type(self).__init__).parameters.keys() - {"self"} # Skip "self" parameter
1710+
kwargs = {param: getattr(self, param) for param in params} # Assume parameters are attributes
1711+
1712+
return type(self)(**{**kwargs, **overrides})
17061713

17071714
def use_ref(self: S, name: str) -> S:
17081715
if self.snapshot_id:

pyiceberg/transforms.py

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,9 @@
1717

1818
import base64
1919
import datetime as py_datetime
20+
import importlib
2021
import struct
22+
import types
2123
from abc import ABC, abstractmethod
2224
from enum import IntEnum
2325
from functools import singledispatch
@@ -28,6 +30,7 @@
2830
import mmh3
2931
from pydantic import Field, PositiveInt, PrivateAttr
3032

33+
from pyiceberg.exceptions import NotInstalledError
3134
from pyiceberg.expressions import (
3235
BoundEqualTo,
3336
BoundGreaterThan,
@@ -106,6 +109,17 @@
106109
TRUNCATE_PARSER = ParseNumberFromBrackets(TRUNCATE)
107110

108111

112+
def _try_import(module_name: str, extras_name: Optional[str] = None) -> types.ModuleType:
113+
try:
114+
return importlib.import_module(module_name)
115+
except ImportError:
116+
if extras_name:
117+
msg = f'{module_name} needs to be installed. pip install "pyiceberg[{extras_name}]"'
118+
else:
119+
msg = f"{module_name} needs to be installed."
120+
raise NotInstalledError(msg) from None
121+
122+
109123
def _transform_literal(func: Callable[[L], L], lit: Literal[L]) -> Literal[L]:
110124
"""Small helper to upwrap the value from the literal, and wrap it again."""
111125
return literal(func(lit.value))
@@ -382,8 +396,7 @@ def __repr__(self) -> str:
382396
return f"BucketTransform(num_buckets={self._num_buckets})"
383397

384398
def pyarrow_transform(self, source: IcebergType) -> "Callable[[pa.Array], pa.Array]":
385-
from pyiceberg_core import transform as pyiceberg_core_transform
386-
399+
pyiceberg_core_transform = _try_import("pyiceberg_core", extras_name="pyiceberg-core").transform
387400
return _pyiceberg_transform_wrapper(pyiceberg_core_transform.bucket, self._num_buckets)
388401

389402
@property
@@ -509,9 +522,8 @@ def __repr__(self) -> str:
509522
return "YearTransform()"
510523

511524
def pyarrow_transform(self, source: IcebergType) -> "Callable[[pa.Array], pa.Array]":
512-
import pyarrow as pa
513-
from pyiceberg_core import transform as pyiceberg_core_transform
514-
525+
pa = _try_import("pyarrow")
526+
pyiceberg_core_transform = _try_import("pyiceberg_core", extras_name="pyiceberg-core").transform
515527
return _pyiceberg_transform_wrapper(pyiceberg_core_transform.year, expected_type=pa.int32())
516528

517529

@@ -570,8 +582,8 @@ def __repr__(self) -> str:
570582
return "MonthTransform()"
571583

572584
def pyarrow_transform(self, source: IcebergType) -> "Callable[[pa.Array], pa.Array]":
573-
import pyarrow as pa
574-
from pyiceberg_core import transform as pyiceberg_core_transform
585+
pa = _try_import("pyarrow")
586+
pyiceberg_core_transform = _try_import("pyiceberg_core", extras_name="pyiceberg-core").transform
575587

576588
return _pyiceberg_transform_wrapper(pyiceberg_core_transform.month, expected_type=pa.int32())
577589

@@ -639,8 +651,8 @@ def __repr__(self) -> str:
639651
return "DayTransform()"
640652

641653
def pyarrow_transform(self, source: IcebergType) -> "Callable[[pa.Array], pa.Array]":
642-
import pyarrow as pa
643-
from pyiceberg_core import transform as pyiceberg_core_transform
654+
pa = _try_import("pyarrow", extras_name="pyarrow")
655+
pyiceberg_core_transform = _try_import("pyiceberg_core", extras_name="pyiceberg-core").transform
644656

645657
return _pyiceberg_transform_wrapper(pyiceberg_core_transform.day, expected_type=pa.int32())
646658

@@ -692,7 +704,7 @@ def __repr__(self) -> str:
692704
return "HourTransform()"
693705

694706
def pyarrow_transform(self, source: IcebergType) -> "Callable[[pa.Array], pa.Array]":
695-
from pyiceberg_core import transform as pyiceberg_core_transform
707+
pyiceberg_core_transform = _try_import("pyiceberg_core", extras_name="pyiceberg-core").transform
696708

697709
return _pyiceberg_transform_wrapper(pyiceberg_core_transform.hour)
698710

@@ -915,7 +927,7 @@ def __repr__(self) -> str:
915927
return f"TruncateTransform(width={self._width})"
916928

917929
def pyarrow_transform(self, source: IcebergType) -> "Callable[[pa.Array], pa.Array]":
918-
from pyiceberg_core import transform as pyiceberg_core_transform
930+
pyiceberg_core_transform = _try_import("pyiceberg_core", extras_name="pyiceberg-core").transform
919931

920932
return _pyiceberg_transform_wrapper(pyiceberg_core_transform.truncate, self._width)
921933

0 commit comments

Comments
 (0)