Skip to content

Commit b88915c

Browse files
refactor: thread config through codec and hash_registry chain
Eliminate the last global config reads in the runtime path by threading connection-scoped config through the codec encode/decode and hash_registry layers. Encode path: table.py adds _config to the context dict, codecs extract it from key and pass to hash_registry/storage helpers. Decode path: expression.py passes connection to decode_attribute(), which builds a decode key with _config for codec.decode() calls. GC path: scan()/collect() extract config from schemas[0].connection and pass to list_stored_hashes/delete_path/delete_schema_path. All functions accept config=None with lazy fallback to settings.config for backward compatibility. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 2429a8a commit b88915c

File tree

14 files changed

+132
-53
lines changed

14 files changed

+132
-53
lines changed

docs/design/thread-safe-mode.md

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,14 @@ Every module that previously imported `from .settings import config` now reads c
234234
| `declare.py` | `config.jobs.add_job_metadata` | `config` param (threaded from `table.py`) |
235235
| `diagram.py` | `config.display.diagram_direction` | `self._connection._config.display.*` |
236236
| `staged_insert.py` | `config.get_store_spec()` | `self._table.connection._config.get_store_spec()` |
237+
| `hash_registry.py` | `config.get_store_spec()` in 5 functions | `config` kwarg (falls back to `settings.config`) |
238+
| `builtin_codecs/hash.py` | `config` via hash_registry | `_config` from key dict → `config` kwarg to hash_registry |
239+
| `builtin_codecs/attach.py` | `config.get("download_path")` | `_config` from key dict (falls back to `settings.config`) |
240+
| `builtin_codecs/filepath.py` | `config.get_store_spec()` | `_config` from key dict (falls back to `settings.config`) |
241+
| `builtin_codecs/schema.py` | `config.get_store_spec()` in helpers | `config` kwarg to `_build_path()`, `_get_backend()` |
242+
| `builtin_codecs/npy.py` | `config` via schema helpers | `_config` from key dict → `config` kwarg to helpers |
243+
| `builtin_codecs/object.py` | `config` via schema helpers | `_config` from key dict → `config` kwarg to helpers |
244+
| `gc.py` | `config` via hash_registry | `schemas[0].connection._config``config` kwarg |
237245

238246
### Functions that receive config as a parameter
239247

@@ -243,8 +251,14 @@ Some module-level functions cannot access `self.connection`. Config is threaded
243251
|----------|--------|--------------------|
244252
| `declare()` in `declare.py` | `Table.declare()` in `table.py` | `config=self.connection._config` kwarg |
245253
| `_get_job_version()` in `jobs.py` | `AutoPopulate._make_tuples()`, `Job.reserve()` | `config=self.connection._config` positional arg |
246-
247-
Both functions accept `config=None` and fall back to the global `settings.config` for backward compatibility.
254+
| `get_store_backend()` in `hash_registry.py` | codecs, gc.py | `config` kwarg from key dict or schema connection |
255+
| `get_store_subfolding()` in `hash_registry.py` | `put_hash()` | `config` kwarg chained from caller |
256+
| `put_hash()` in `hash_registry.py` | `HashCodec.encode()` | `config` kwarg from `_config` in key dict |
257+
| `get_hash()` in `hash_registry.py` | `HashCodec.decode()` | `config` kwarg from `_config` in key dict |
258+
| `delete_path()` in `hash_registry.py` | `gc.collect()` | `config` kwarg from `schemas[0].connection._config` |
259+
| `decode_attribute()` in `codecs.py` | `expression.py` fetch methods | `connection` kwarg → extracts `connection._config` |
260+
261+
All functions accept `config=None` and fall back to the global `settings.config` for backward compatibility.
248262

249263
## Implementation
250264

src/datajoint/builtin_codecs/attach.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -98,14 +98,15 @@ def decode(self, stored: bytes, *, key: dict | None = None) -> str:
9898
"""
9999
from pathlib import Path
100100

101-
from ..settings import config
102-
103101
# Split on first null byte
104102
null_pos = stored.index(b"\x00")
105103
filename = stored[:null_pos].decode("utf-8")
106104
contents = stored[null_pos + 1 :]
107105

108106
# Write to download path
107+
config = (key or {}).get("_config")
108+
if config is None:
109+
from ..settings import config
109110
download_path = Path(config.get("download_path", "."))
110111
download_path.mkdir(parents=True, exist_ok=True)
111112
local_path = download_path / filename

src/datajoint/builtin_codecs/filepath.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -98,9 +98,12 @@ def encode(self, value: Any, *, key: dict | None = None, store_name: str | None
9898
"""
9999
from datetime import datetime, timezone
100100

101-
from .. import config
102101
from ..hash_registry import get_store_backend
103102

103+
config = (key or {}).get("_config")
104+
if config is None:
105+
from ..settings import config
106+
104107
path = str(value)
105108

106109
# Get store spec to check prefix configuration
@@ -137,7 +140,7 @@ def encode(self, value: Any, *, key: dict | None = None, store_name: str | None
137140
raise ValueError(f"<filepath@> must use prefix '{filepath_prefix}' (filepath_prefix). Got path: {path}")
138141

139142
# Verify file exists
140-
backend = get_store_backend(store_name)
143+
backend = get_store_backend(store_name, config=config)
141144
if not backend.exists(path):
142145
raise FileNotFoundError(f"File not found in store '{store_name or 'default'}': {path}")
143146

@@ -174,8 +177,9 @@ def decode(self, stored: dict, *, key: dict | None = None) -> Any:
174177
from ..objectref import ObjectRef
175178
from ..hash_registry import get_store_backend
176179

180+
config = (key or {}).get("_config")
177181
store_name = stored.get("store")
178-
backend = get_store_backend(store_name)
182+
backend = get_store_backend(store_name, config=config)
179183
return ObjectRef.from_json(stored, backend=backend)
180184

181185
def validate(self, value: Any) -> None:

src/datajoint/builtin_codecs/hash.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,8 @@ def encode(self, value: bytes, *, key: dict | None = None, store_name: str | Non
7676
from ..hash_registry import put_hash
7777

7878
schema_name = (key or {}).get("_schema", "unknown")
79-
return put_hash(value, schema_name=schema_name, store_name=store_name)
79+
config = (key or {}).get("_config")
80+
return put_hash(value, schema_name=schema_name, store_name=store_name, config=config)
8081

8182
def decode(self, stored: dict, *, key: dict | None = None) -> bytes:
8283
"""
@@ -96,7 +97,8 @@ def decode(self, stored: dict, *, key: dict | None = None) -> bytes:
9697
"""
9798
from ..hash_registry import get_hash
9899

99-
return get_hash(stored)
100+
config = (key or {}).get("_config")
101+
return get_hash(stored, config=config)
100102

101103
def validate(self, value: Any) -> None:
102104
"""Validate that value is bytes."""

src/datajoint/builtin_codecs/npy.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -336,17 +336,18 @@ def encode(
336336

337337
# Extract context using inherited helper
338338
schema, table, field, primary_key = self._extract_context(key)
339+
config = (key or {}).get("_config")
339340

340341
# Build schema-addressed storage path
341-
path, _ = self._build_path(schema, table, field, primary_key, ext=".npy", store_name=store_name)
342+
path, _ = self._build_path(schema, table, field, primary_key, ext=".npy", store_name=store_name, config=config)
342343

343344
# Serialize to .npy format
344345
buffer = io.BytesIO()
345346
np.save(buffer, value, allow_pickle=False)
346347
npy_bytes = buffer.getvalue()
347348

348349
# Upload to storage using inherited helper
349-
backend = self._get_backend(store_name)
350+
backend = self._get_backend(store_name, config=config)
350351
backend.put_buffer(npy_bytes, path)
351352

352353
# Return metadata (includes numpy-specific shape/dtype)
@@ -373,5 +374,6 @@ def decode(self, stored: dict, *, key: dict | None = None) -> NpyRef:
373374
NpyRef
374375
Lazy array reference with metadata access and numpy integration.
375376
"""
376-
backend = self._get_backend(stored.get("store"))
377+
config = (key or {}).get("_config")
378+
backend = self._get_backend(stored.get("store"), config=config)
377379
return NpyRef(stored, backend)

src/datajoint/builtin_codecs/object.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ def encode(
104104

105105
# Extract context using inherited helper
106106
schema, table, field, primary_key = self._extract_context(key)
107+
config = (key or {}).get("_config")
107108

108109
# Check for pre-computed metadata (from staged insert)
109110
if isinstance(value, dict) and "path" in value:
@@ -145,10 +146,10 @@ def encode(
145146
raise TypeError(f"<object> expects bytes or path, got {type(value).__name__}")
146147

147148
# Build storage path using inherited helper
148-
path, token = self._build_path(schema, table, field, primary_key, ext=ext, store_name=store_name)
149+
path, token = self._build_path(schema, table, field, primary_key, ext=ext, store_name=store_name, config=config)
149150

150151
# Get storage backend using inherited helper
151-
backend = self._get_backend(store_name)
152+
backend = self._get_backend(store_name, config=config)
152153

153154
# Upload content
154155
if is_dir:
@@ -192,7 +193,8 @@ def decode(self, stored: dict, *, key: dict | None = None) -> Any:
192193
"""
193194
from ..objectref import ObjectRef
194195

195-
backend = self._get_backend(stored.get("store"))
196+
config = (key or {}).get("_config")
197+
backend = self._get_backend(stored.get("store"), config=config)
196198
return ObjectRef.from_json(stored, backend=backend)
197199

198200
def validate(self, value: Any) -> None:

src/datajoint/builtin_codecs/schema.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@ def _build_path(
108108
primary_key: dict,
109109
ext: str | None = None,
110110
store_name: str | None = None,
111+
config=None,
111112
) -> tuple[str, str]:
112113
"""
113114
Build schema-addressed storage path.
@@ -131,6 +132,8 @@ def _build_path(
131132
File extension (e.g., ".npy", ".zarr").
132133
store_name : str, optional
133134
Store name for retrieving partition configuration.
135+
config : Config, optional
136+
Config instance. If None, falls back to global settings.config.
134137
135138
Returns
136139
-------
@@ -139,7 +142,9 @@ def _build_path(
139142
is a unique identifier.
140143
"""
141144
from ..storage import build_object_path
142-
from .. import config
145+
146+
if config is None:
147+
from ..settings import config
143148

144149
# Get store configuration for partition_pattern and token_length
145150
spec = config.get_store_spec(store_name)
@@ -156,14 +161,16 @@ def _build_path(
156161
token_length=token_length,
157162
)
158163

159-
def _get_backend(self, store_name: str | None = None):
164+
def _get_backend(self, store_name: str | None = None, config=None):
160165
"""
161166
Get storage backend by name.
162167
163168
Parameters
164169
----------
165170
store_name : str, optional
166171
Store name. If None, returns default store.
172+
config : Config, optional
173+
Config instance. If None, falls back to global settings.config.
167174
168175
Returns
169176
-------
@@ -172,4 +179,4 @@ def _get_backend(self, store_name: str | None = None):
172179
"""
173180
from ..hash_registry import get_store_backend
174181

175-
return get_store_backend(store_name)
182+
return get_store_backend(store_name, config=config)

src/datajoint/codecs.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -515,7 +515,7 @@ def lookup_codec(codec_spec: str) -> tuple[Codec, str | None]:
515515
# =============================================================================
516516

517517

518-
def decode_attribute(attr, data, squeeze: bool = False):
518+
def decode_attribute(attr, data, squeeze: bool = False, connection=None):
519519
"""
520520
Decode raw database value using attribute's codec or native type handling.
521521
@@ -528,6 +528,8 @@ def decode_attribute(attr, data, squeeze: bool = False):
528528
attr: Attribute from the table's heading.
529529
data: Raw value fetched from the database.
530530
squeeze: If True, remove singleton dimensions from numpy arrays.
531+
connection: Connection instance for config access. If provided,
532+
``connection._config`` is passed to codecs via the key dict.
531533
532534
Returns:
533535
Decoded Python value.
@@ -560,9 +562,14 @@ def decode_attribute(attr, data, squeeze: bool = False):
560562
elif final_dtype.lower() == "binary(16)":
561563
data = uuid_module.UUID(bytes=data)
562564

565+
# Build decode key with config if connection is available
566+
decode_key = None
567+
if connection is not None:
568+
decode_key = {"_config": connection._config}
569+
563570
# Apply decoders in reverse order: innermost first, then outermost
564571
for codec in reversed(type_chain):
565-
data = codec.decode(data, key=None)
572+
data = codec.decode(data, key=decode_key)
566573

567574
# Squeeze arrays if requested
568575
if squeeze and isinstance(data, np.ndarray):

src/datajoint/expression.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -715,7 +715,7 @@ def fetch(
715715
import warnings
716716

717717
warnings.warn(
718-
"fetch() is deprecated in DataJoint 2.0. " "Use to_dicts(), to_pandas(), to_arrays(), or keys() instead.",
718+
"fetch() is deprecated in DataJoint 2.0. Use to_dicts(), to_pandas(), to_arrays(), or keys() instead.",
719719
DeprecationWarning,
720720
stacklevel=2,
721721
)
@@ -817,7 +817,10 @@ def fetch1(self, *attrs, squeeze=False):
817817
row = cursor.fetchone()
818818
if not row or cursor.fetchone():
819819
raise DataJointError("fetch1 requires exactly one tuple in the input set.")
820-
return {name: decode_attribute(heading[name], row[name], squeeze=squeeze) for name in heading.names}
820+
return {
821+
name: decode_attribute(heading[name], row[name], squeeze=squeeze, connection=self.connection)
822+
for name in heading.names
823+
}
821824
else:
822825
# Handle "KEY" specially - it means primary key columns
823826
def is_key(attr):
@@ -892,7 +895,10 @@ def to_dicts(self, order_by=None, limit=None, offset=None, squeeze=False):
892895
expr = self._apply_top(order_by, limit, offset)
893896
cursor = expr.cursor(as_dict=True)
894897
heading = expr.heading
895-
return [{name: decode_attribute(heading[name], row[name], squeeze) for name in heading.names} for row in cursor]
898+
return [
899+
{name: decode_attribute(heading[name], row[name], squeeze, connection=expr.connection) for name in heading.names}
900+
for row in cursor
901+
]
896902

897903
def to_pandas(self, order_by=None, limit=None, offset=None, squeeze=False):
898904
"""
@@ -1063,7 +1069,7 @@ def to_arrays(self, *attrs, include_key=False, order_by=None, limit=None, offset
10631069
return result_arrays[0] if len(attrs) == 1 else tuple(result_arrays)
10641070
else:
10651071
# Fetch all columns as structured array
1066-
get = partial(decode_attribute, squeeze=squeeze)
1072+
get = partial(decode_attribute, squeeze=squeeze, connection=expr.connection)
10671073
cursor = expr.cursor(as_dict=False)
10681074
rows = list(cursor.fetchall())
10691075

@@ -1217,7 +1223,10 @@ def __iter__(self):
12171223
cursor = self.cursor(as_dict=True)
12181224
heading = self.heading
12191225
for row in cursor:
1220-
yield {name: decode_attribute(heading[name], row[name], squeeze=False) for name in heading.names}
1226+
yield {
1227+
name: decode_attribute(heading[name], row[name], squeeze=False, connection=self.connection)
1228+
for name in heading.names
1229+
}
12211230

12221231
def cursor(self, as_dict=False):
12231232
"""

0 commit comments

Comments
 (0)