Skip to content

Commit def02b6

Browse files
authored
Merge branch 'main' into feat/scale-offset-cast-value
2 parents 0d6e48d + 85dfbcb commit def02b6

File tree

4 files changed

+163
-44
lines changed

4 files changed

+163
-44
lines changed

.github/workflows/docs.yml

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
name: Docs
2+
3+
on:
4+
push:
5+
branches: [main]
6+
pull_request:
7+
branches: [main]
8+
workflow_dispatch:
9+
10+
permissions:
11+
contents: read
12+
13+
concurrency:
14+
group: ${{ github.workflow }}-${{ github.ref }}
15+
cancel-in-progress: true
16+
17+
jobs:
18+
docs:
19+
name: Check docs
20+
runs-on: ubuntu-latest
21+
steps:
22+
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
23+
with:
24+
persist-credentials: false
25+
- uses: astral-sh/setup-uv@f0ec1fc3b38f5e7cd731bb6ce540c5af426746bb # v6.1.0
26+
- run: uv sync --group docs
27+
- run: uv run mkdocs build
28+
env:
29+
DISABLE_MKDOCS_2_WARNING: "true"
30+
NO_MKDOCS_2_WARNING: "true"
31+
- run: uv run python ci/check_unlinked_types.py

ci/check_unlinked_types.py

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
"""Check for unlinked type annotations in built documentation.
2+
3+
mkdocstrings renders resolved types as <a href="..."> links and unresolved
4+
types as <span title="fully.qualified.Name">Name</span> without an anchor.
5+
This script finds all such unlinked types in the built HTML and reports them.
6+
7+
Usage:
8+
python ci/check_unlinked_types.py [site_dir]
9+
10+
Raises ValueError if unlinked types are found.
11+
"""
12+
13+
from __future__ import annotations
14+
15+
import re
16+
import sys
17+
from pathlib import Path
18+
19+
# Matches the griffe/mkdocstrings pattern for unlinked cross-references:
20+
# <span class="n"><span title="fully.qualified.Name">Name</span></span>
21+
UNLINKED_PATTERN = re.compile(
22+
r'<span class="n"><span title="(?P<qualname>[^"]+)">(?P<name>[^<]+)</span></span>'
23+
)
24+
25+
# Patterns to exclude from the report
26+
EXCLUDE_PATTERNS = [
27+
# TypeVars and type parameters (single brackets like Foo[T])
28+
re.compile(r"\[.+\]$"),
29+
# Dataclass field / namedtuple field references (contain parens)
30+
re.compile(r"\("),
31+
# Private names
32+
re.compile(r"\._"),
33+
# Dunder attributes
34+
re.compile(r"\.__\w+__$"),
35+
# Testing utilities
36+
re.compile(r"^zarr\.testing\."),
37+
# Third-party types (hypothesis, pytest, etc.)
38+
re.compile(r"^(hypothesis|pytest|typing_extensions|builtins|dataclasses)\."),
39+
]
40+
41+
42+
def should_exclude(qualname: str) -> bool:
43+
return any(p.search(qualname) for p in EXCLUDE_PATTERNS)
44+
45+
46+
def find_unlinked_types(site_dir: Path) -> dict[str, set[str]]:
47+
"""Find all unlinked types in built HTML files.
48+
49+
Returns a dict mapping qualified type names to the set of pages where they appear.
50+
"""
51+
api_dir = site_dir / "api"
52+
if not api_dir.exists():
53+
raise FileNotFoundError(f"{api_dir} does not exist. Run 'mkdocs build' first.")
54+
55+
unlinked: dict[str, set[str]] = {}
56+
for html_file in api_dir.rglob("*.html"):
57+
content = html_file.read_text(errors="replace")
58+
rel_path = str(html_file.relative_to(site_dir))
59+
for match in UNLINKED_PATTERN.finditer(content):
60+
qualname = match.group("qualname")
61+
if not should_exclude(qualname):
62+
unlinked.setdefault(qualname, set()).add(rel_path)
63+
64+
return unlinked
65+
66+
67+
def main() -> None:
68+
site_dir = Path(sys.argv[1]) if len(sys.argv) > 1 else Path("site")
69+
unlinked = find_unlinked_types(site_dir)
70+
71+
if not unlinked:
72+
print("No unlinked types found.")
73+
return
74+
75+
lines = [f"Found {len(unlinked)} unlinked types:\n"]
76+
for qualname in sorted(unlinked):
77+
pages = sorted(unlinked[qualname])
78+
lines.append(f" {qualname}")
79+
lines.extend(f" - {page}" for page in pages)
80+
81+
all_pages = {p for ps in unlinked.values() for p in ps}
82+
lines.append(f"\nTotal: {len(unlinked)} unlinked types across {len(all_pages)} pages")
83+
report = "\n".join(lines)
84+
raise ValueError(report)
85+
86+
87+
if __name__ == "__main__":
88+
main()

src/zarr/registry.py

Lines changed: 43 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -57,11 +57,11 @@ def register(self, cls: type[T], qualname: str | None = None) -> None:
5757
self[qualname] = cls
5858

5959

60-
__codec_registries: dict[str, Registry[Codec]] = defaultdict(Registry)
61-
__pipeline_registry: Registry[CodecPipeline] = Registry()
62-
__buffer_registry: Registry[Buffer] = Registry()
63-
__ndbuffer_registry: Registry[NDBuffer] = Registry()
64-
__chunk_key_encoding_registry: Registry[ChunkKeyEncoding] = Registry()
60+
_codec_registries: dict[str, Registry[Codec]] = defaultdict(Registry)
61+
_pipeline_registry: Registry[CodecPipeline] = Registry()
62+
_buffer_registry: Registry[Buffer] = Registry()
63+
_ndbuffer_registry: Registry[NDBuffer] = Registry()
64+
_chunk_key_encoding_registry: Registry[ChunkKeyEncoding] = Registry()
6565

6666
"""
6767
The registry module is responsible for managing implementations of codecs,
@@ -93,37 +93,37 @@ def _collect_entrypoints() -> list[Registry[Any]]:
9393
"""
9494
entry_points = get_entry_points()
9595

96-
__buffer_registry.lazy_load_list.extend(entry_points.select(group="zarr.buffer"))
97-
__buffer_registry.lazy_load_list.extend(entry_points.select(group="zarr", name="buffer"))
98-
__ndbuffer_registry.lazy_load_list.extend(entry_points.select(group="zarr.ndbuffer"))
99-
__ndbuffer_registry.lazy_load_list.extend(entry_points.select(group="zarr", name="ndbuffer"))
96+
_buffer_registry.lazy_load_list.extend(entry_points.select(group="zarr.buffer"))
97+
_buffer_registry.lazy_load_list.extend(entry_points.select(group="zarr", name="buffer"))
98+
_ndbuffer_registry.lazy_load_list.extend(entry_points.select(group="zarr.ndbuffer"))
99+
_ndbuffer_registry.lazy_load_list.extend(entry_points.select(group="zarr", name="ndbuffer"))
100100

101101
data_type_registry._lazy_load_list.extend(entry_points.select(group="zarr.data_type"))
102102
data_type_registry._lazy_load_list.extend(entry_points.select(group="zarr", name="data_type"))
103103

104-
__chunk_key_encoding_registry.lazy_load_list.extend(
104+
_chunk_key_encoding_registry.lazy_load_list.extend(
105105
entry_points.select(group="zarr.chunk_key_encoding")
106106
)
107-
__chunk_key_encoding_registry.lazy_load_list.extend(
107+
_chunk_key_encoding_registry.lazy_load_list.extend(
108108
entry_points.select(group="zarr", name="chunk_key_encoding")
109109
)
110110

111-
__pipeline_registry.lazy_load_list.extend(entry_points.select(group="zarr.codec_pipeline"))
112-
__pipeline_registry.lazy_load_list.extend(
111+
_pipeline_registry.lazy_load_list.extend(entry_points.select(group="zarr.codec_pipeline"))
112+
_pipeline_registry.lazy_load_list.extend(
113113
entry_points.select(group="zarr", name="codec_pipeline")
114114
)
115115
for e in entry_points.select(group="zarr.codecs"):
116-
__codec_registries[e.name].lazy_load_list.append(e)
116+
_codec_registries[e.name].lazy_load_list.append(e)
117117
for group in entry_points.groups:
118118
if group.startswith("zarr.codecs."):
119119
codec_name = group.split(".")[2]
120-
__codec_registries[codec_name].lazy_load_list.extend(entry_points.select(group=group))
120+
_codec_registries[codec_name].lazy_load_list.extend(entry_points.select(group=group))
121121
return [
122-
*__codec_registries.values(),
123-
__pipeline_registry,
124-
__buffer_registry,
125-
__ndbuffer_registry,
126-
__chunk_key_encoding_registry,
122+
*_codec_registries.values(),
123+
_pipeline_registry,
124+
_buffer_registry,
125+
_ndbuffer_registry,
126+
_chunk_key_encoding_registry,
127127
]
128128

129129

@@ -137,36 +137,36 @@ def fully_qualified_name(cls: type) -> str:
137137

138138

139139
def register_codec(key: str, codec_cls: type[Codec], *, qualname: str | None = None) -> None:
140-
if key not in __codec_registries:
141-
__codec_registries[key] = Registry()
142-
__codec_registries[key].register(codec_cls, qualname=qualname)
140+
if key not in _codec_registries:
141+
_codec_registries[key] = Registry()
142+
_codec_registries[key].register(codec_cls, qualname=qualname)
143143

144144

145145
def register_pipeline(pipe_cls: type[CodecPipeline]) -> None:
146-
__pipeline_registry.register(pipe_cls)
146+
_pipeline_registry.register(pipe_cls)
147147

148148

149149
def register_ndbuffer(cls: type[NDBuffer], qualname: str | None = None) -> None:
150-
__ndbuffer_registry.register(cls, qualname)
150+
_ndbuffer_registry.register(cls, qualname)
151151

152152

153153
def register_buffer(cls: type[Buffer], qualname: str | None = None) -> None:
154-
__buffer_registry.register(cls, qualname)
154+
_buffer_registry.register(cls, qualname)
155155

156156

157157
def register_chunk_key_encoding(key: str, cls: type) -> None:
158-
__chunk_key_encoding_registry.register(cls, key)
158+
_chunk_key_encoding_registry.register(cls, key)
159159

160160

161161
def get_codec_class(key: str, reload_config: bool = False) -> type[Codec]:
162162
if reload_config:
163163
_reload_config()
164164

165-
if key in __codec_registries:
165+
if key in _codec_registries:
166166
# logger.debug("Auto loading codec '%s' from entrypoint", codec_id)
167-
__codec_registries[key].lazy_load()
167+
_codec_registries[key].lazy_load()
168168

169-
codec_classes = __codec_registries[key]
169+
codec_classes = _codec_registries[key]
170170
if not codec_classes:
171171
raise KeyError(key)
172172
config_entry = config.get("codecs", {}).get(key)
@@ -257,50 +257,50 @@ def _parse_array_array_codec(data: dict[str, JSON] | Codec) -> ArrayArrayCodec:
257257
def get_pipeline_class(reload_config: bool = False) -> type[CodecPipeline]:
258258
if reload_config:
259259
_reload_config()
260-
__pipeline_registry.lazy_load()
260+
_pipeline_registry.lazy_load()
261261
path = config.get("codec_pipeline.path")
262-
pipeline_class = __pipeline_registry.get(path)
262+
pipeline_class = _pipeline_registry.get(path)
263263
if pipeline_class:
264264
return pipeline_class
265265
raise BadConfigError(
266-
f"Pipeline class '{path}' not found in registered pipelines: {list(__pipeline_registry)}."
266+
f"Pipeline class '{path}' not found in registered pipelines: {list(_pipeline_registry)}."
267267
)
268268

269269

270270
def get_buffer_class(reload_config: bool = False) -> type[Buffer]:
271271
if reload_config:
272272
_reload_config()
273-
__buffer_registry.lazy_load()
273+
_buffer_registry.lazy_load()
274274

275275
path = config.get("buffer")
276-
buffer_class = __buffer_registry.get(path)
276+
buffer_class = _buffer_registry.get(path)
277277
if buffer_class:
278278
return buffer_class
279279
raise BadConfigError(
280-
f"Buffer class '{path}' not found in registered buffers: {list(__buffer_registry)}."
280+
f"Buffer class '{path}' not found in registered buffers: {list(_buffer_registry)}."
281281
)
282282

283283

284284
def get_ndbuffer_class(reload_config: bool = False) -> type[NDBuffer]:
285285
if reload_config:
286286
_reload_config()
287-
__ndbuffer_registry.lazy_load()
287+
_ndbuffer_registry.lazy_load()
288288
path = config.get("ndbuffer")
289-
ndbuffer_class = __ndbuffer_registry.get(path)
289+
ndbuffer_class = _ndbuffer_registry.get(path)
290290
if ndbuffer_class:
291291
return ndbuffer_class
292292
raise BadConfigError(
293-
f"NDBuffer class '{path}' not found in registered buffers: {list(__ndbuffer_registry)}."
293+
f"NDBuffer class '{path}' not found in registered buffers: {list(_ndbuffer_registry)}."
294294
)
295295

296296

297297
def get_chunk_key_encoding_class(key: str) -> type[ChunkKeyEncoding]:
298-
__chunk_key_encoding_registry.lazy_load(use_entrypoint_name=True)
299-
if key not in __chunk_key_encoding_registry:
298+
_chunk_key_encoding_registry.lazy_load(use_entrypoint_name=True)
299+
if key not in _chunk_key_encoding_registry:
300300
raise KeyError(
301-
f"Chunk key encoding '{key}' not found in registered chunk key encodings: {list(__chunk_key_encoding_registry)}."
301+
f"Chunk key encoding '{key}' not found in registered chunk key encodings: {list(_chunk_key_encoding_registry)}."
302302
)
303-
return __chunk_key_encoding_registry[key]
303+
return _chunk_key_encoding_registry[key]
304304

305305

306306
_collect_entrypoints()

tests/test_metadata/test_v3.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -341,7 +341,7 @@ def test_parse_codecs_unknown_codec_raises(monkeypatch: pytest.MonkeyPatch) -> N
341341
from zarr.registry import Registry
342342

343343
# to make sure the codec is always unknown (not sure if that's necessary)
344-
monkeypatch.setattr(zarr.registry, "__codec_registries", defaultdict(Registry))
344+
monkeypatch.setattr(zarr.registry, "_codec_registries", defaultdict(Registry))
345345

346346
codecs = [{"name": "unknown"}]
347347
with pytest.raises(UnknownCodecError):

0 commit comments

Comments
 (0)