Skip to content

Commit 5cb8ee9

Browse files
committed
refactor Config / lazy loading
1 parent 09de790 commit 5cb8ee9

File tree

8 files changed

+31
-20
lines changed

8 files changed

+31
-20
lines changed

pyiceberg/catalog/__init__.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
from collections.abc import Callable
2626
from dataclasses import dataclass
2727
from enum import Enum
28+
from functools import lru_cache
2829
from typing import (
2930
TYPE_CHECKING,
3031
Any,
@@ -74,7 +75,11 @@
7475

7576
logger = logging.getLogger(__name__)
7677

77-
_ENV_CONFIG = Config()
78+
79+
@lru_cache(maxsize=1)
80+
def _get_env_config() -> Config:
81+
return Config.load()
82+
7883

7984
TOKEN = "token"
8085
TYPE = "type"
@@ -243,9 +248,9 @@ def load_catalog(name: str | None = None, **properties: str | None) -> Catalog:
243248
or if it could not determine the catalog based on the properties.
244249
"""
245250
if name is None:
246-
name = _ENV_CONFIG.get_default_catalog_name()
251+
name = _get_env_config().get_default_catalog_name()
247252

248-
env = _ENV_CONFIG.get_catalog_config(name)
253+
env = _get_env_config().get_catalog_config(name)
249254
conf: RecursiveDict = merge_config(env or {}, cast(RecursiveDict, properties))
250255

251256
catalog_type: CatalogType | None
@@ -278,7 +283,7 @@ def load_catalog(name: str | None = None, **properties: str | None) -> Catalog:
278283

279284

280285
def list_catalogs() -> list[str]:
281-
return _ENV_CONFIG.get_known_catalogs()
286+
return _get_env_config().get_known_catalogs()
282287

283288

284289
def delete_files(io: FileIO, files_to_delete: set[str], file_type: str) -> None:
@@ -781,7 +786,7 @@ def _convert_schema_if_needed(
781786

782787
from pyiceberg.io.pyarrow import _ConvertToIcebergWithoutIDs, visit_pyarrow
783788

784-
downcast_ns_timestamp_to_us = Config().get_bool(DOWNCAST_NS_TIMESTAMP_TO_US_ON_WRITE) or False
789+
downcast_ns_timestamp_to_us = Config.load().get_bool(DOWNCAST_NS_TIMESTAMP_TO_US_ON_WRITE) or False
785790
if isinstance(schema, pa.Schema):
786791
schema: Schema = visit_pyarrow( # type: ignore
787792
schema,

pyiceberg/catalog/bigquery_metastore.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ def __init__(self, name: str, **properties: str):
7171
raise ValueError(f"Missing property: {GCP_PROJECT_ID}")
7272

7373
# BigQuery requires current-snapshot-id to be present for tables to be created.
74-
if not Config().get_bool("legacy-current-snapshot-id"):
74+
if not Config.load().get_bool("legacy-current-snapshot-id"):
7575
raise ValueError("legacy-current-snapshot-id must be enabled to work with BigQuery.")
7676

7777
if credentials_file and credentials_info_str:

pyiceberg/io/pyarrow.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1744,7 +1744,7 @@ def __init__(
17441744
self._bound_row_filter = bind(table_metadata.schema(), row_filter, case_sensitive=case_sensitive)
17451745
self._case_sensitive = case_sensitive
17461746
self._limit = limit
1747-
self._downcast_ns_timestamp_to_us = Config().get_bool(DOWNCAST_NS_TIMESTAMP_TO_US_ON_WRITE)
1747+
self._downcast_ns_timestamp_to_us = Config.load().get_bool(DOWNCAST_NS_TIMESTAMP_TO_US_ON_WRITE)
17481748

17491749
@property
17501750
def _projected_field_ids(self) -> set[int]:
@@ -2685,7 +2685,7 @@ def write_parquet(task: WriteTask) -> DataFile:
26852685
else:
26862686
file_schema = table_schema
26872687

2688-
downcast_ns_timestamp_to_us = Config().get_bool(DOWNCAST_NS_TIMESTAMP_TO_US_ON_WRITE) or False
2688+
downcast_ns_timestamp_to_us = Config.load().get_bool(DOWNCAST_NS_TIMESTAMP_TO_US_ON_WRITE) or False
26892689
batches = [
26902690
_to_requested_schema(
26912691
requested_schema=file_schema,
@@ -2892,7 +2892,7 @@ def _dataframe_to_data_files(
28922892
default=TableProperties.WRITE_TARGET_FILE_SIZE_BYTES_DEFAULT,
28932893
)
28942894
name_mapping = table_metadata.schema().name_mapping
2895-
downcast_ns_timestamp_to_us = Config().get_bool(DOWNCAST_NS_TIMESTAMP_TO_US_ON_WRITE) or False
2895+
downcast_ns_timestamp_to_us = Config.load().get_bool(DOWNCAST_NS_TIMESTAMP_TO_US_ON_WRITE) or False
28962896
task_schema = pyarrow_to_schema(
28972897
df.schema,
28982898
name_mapping=name_mapping,

pyiceberg/serializers.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ def table_metadata(metadata: TableMetadata, output_file: OutputFile, overwrite:
129129
"""
130130
with output_file.create(overwrite=overwrite) as output_stream:
131131
# We need to serialize None values, in order to dump `None` current-snapshot-id as `-1`
132-
exclude_none = False if Config().get_bool("legacy-current-snapshot-id") else True
132+
exclude_none = False if Config.load().get_bool("legacy-current-snapshot-id") else True
133133

134134
json_bytes = metadata.model_dump_json(exclude_none=exclude_none).encode(UTF8)
135135
json_bytes = Compressor.get_compressor(output_file.location).bytes_compressor()(json_bytes)

pyiceberg/table/__init__.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -469,7 +469,7 @@ def append(self, df: pa.Table, snapshot_properties: dict[str, str] = EMPTY_DICT,
469469
if not isinstance(df, pa.Table):
470470
raise ValueError(f"Expected PyArrow table, got: {df}")
471471

472-
downcast_ns_timestamp_to_us = Config().get_bool(DOWNCAST_NS_TIMESTAMP_TO_US_ON_WRITE) or False
472+
downcast_ns_timestamp_to_us = Config.load().get_bool(DOWNCAST_NS_TIMESTAMP_TO_US_ON_WRITE) or False
473473
_check_pyarrow_schema_compatible(
474474
self.table_metadata.schema(),
475475
provided_schema=df.schema,
@@ -523,7 +523,7 @@ def dynamic_partition_overwrite(
523523
f"in the latest partition spec: {field}"
524524
)
525525

526-
downcast_ns_timestamp_to_us = Config().get_bool(DOWNCAST_NS_TIMESTAMP_TO_US_ON_WRITE) or False
526+
downcast_ns_timestamp_to_us = Config.load().get_bool(DOWNCAST_NS_TIMESTAMP_TO_US_ON_WRITE) or False
527527
_check_pyarrow_schema_compatible(
528528
self.table_metadata.schema(),
529529
provided_schema=df.schema,
@@ -588,7 +588,7 @@ def overwrite(
588588
if not isinstance(df, pa.Table):
589589
raise ValueError(f"Expected PyArrow table, got: {df}")
590590

591-
downcast_ns_timestamp_to_us = Config().get_bool(DOWNCAST_NS_TIMESTAMP_TO_US_ON_WRITE) or False
591+
downcast_ns_timestamp_to_us = Config.load().get_bool(DOWNCAST_NS_TIMESTAMP_TO_US_ON_WRITE) or False
592592
_check_pyarrow_schema_compatible(
593593
self.table_metadata.schema(),
594594
provided_schema=df.schema,
@@ -787,7 +787,7 @@ def upsert(
787787

788788
from pyiceberg.io.pyarrow import _check_pyarrow_schema_compatible
789789

790-
downcast_ns_timestamp_to_us = Config().get_bool(DOWNCAST_NS_TIMESTAMP_TO_US_ON_WRITE) or False
790+
downcast_ns_timestamp_to_us = Config.load().get_bool(DOWNCAST_NS_TIMESTAMP_TO_US_ON_WRITE) or False
791791
_check_pyarrow_schema_compatible(
792792
self.table_metadata.schema(),
793793
provided_schema=df.schema,

pyiceberg/table/metadata.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -328,7 +328,7 @@ def sort_order_by_id(self, sort_order_id: int) -> SortOrder | None:
328328

329329
@field_serializer("current_snapshot_id")
330330
def serialize_current_snapshot_id(self, current_snapshot_id: int | None) -> int | None:
331-
if current_snapshot_id is None and Config().get_bool("legacy-current-snapshot-id"):
331+
if current_snapshot_id is None and Config.load().get_bool("legacy-current-snapshot-id"):
332332
return -1
333333
return current_snapshot_id
334334

pyiceberg/utils/concurrent.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ class ExecutorFactory:
2929
@staticmethod
3030
def max_workers() -> int | None:
3131
"""Return the max number of workers configured."""
32-
return Config().get_int("max-workers")
32+
return Config.load().get_int("max-workers")
3333

3434
@staticmethod
3535
def get_or_create() -> Executor:

pyiceberg/utils/config.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
# KIND, either express or implied. See the License for the
1515
# specific language governing permissions and limitations
1616
# under the License.
17+
from __future__ import annotations
18+
1719
import logging
1820
import os
1921

@@ -59,10 +61,14 @@ def _lowercase_dictionary_keys(input_dict: RecursiveDict) -> RecursiveDict:
5961
class Config:
6062
config: RecursiveDict
6163

62-
def __init__(self) -> None:
63-
config = self._from_configuration_files() or {}
64-
config = merge_config(config, self._from_environment_variables(config))
65-
self.config = FrozenDict(**config)
64+
def __init__(self, config: RecursiveDict | None = None) -> None:
65+
self.config = FrozenDict(**(config or {}))
66+
67+
@classmethod
68+
def load(cls) -> Config:
69+
config = cls._from_configuration_files() or {}
70+
config = merge_config(config, cls._from_environment_variables(config))
71+
return cls(config)
6672

6773
@staticmethod
6874
def _from_configuration_files() -> RecursiveDict | None:

0 commit comments

Comments
 (0)