Skip to content

Commit cb2e98a

Browse files
committed
tighten plugin catalog edge cases
1 parent 773c6ba commit cb2e98a

2 files changed

Lines changed: 70 additions & 16 deletions

File tree

packages/data-designer/src/data_designer/cli/repositories/plugin_catalog_repository.py

Lines changed: 31 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,14 @@
3232
from data_designer.config.utils.io_helpers import load_config_file, save_config_file
3333

3434

35+
class _PluginCatalogSourceUnavailableError(PluginCatalogError):
36+
pass
37+
38+
39+
class _PluginCatalogContentError(PluginCatalogError):
40+
pass
41+
42+
3543
class PluginCatalogRepository(ConfigRepository[PluginCatalogRegistry]):
3644
"""Repository for plugin catalog aliases and cached catalog payloads."""
3745

@@ -139,7 +147,7 @@ def load_catalog(self, alias: str | None = None, *, refresh: bool = False) -> Pl
139147

140148
try:
141149
payload = self._fetch_catalog_payload(catalog_config.url)
142-
except (PluginCatalogError, OSError, ValueError):
150+
except _PluginCatalogSourceUnavailableError:
143151
if not refresh:
144152
cached_catalog = self._load_cached_catalog(catalog_config, require_fresh=False)
145153
if cached_catalog is not None:
@@ -257,7 +265,7 @@ def _normalize_catalog_url(url: str) -> str:
257265
segments = [segment for segment in parsed.path.split("/") if segment]
258266

259267
if hostname in {"github.com", "www.github.com"} and len(segments) >= 2:
260-
owner, repo = segments[0], segments[1]
268+
owner, repo = segments[0], segments[1].removesuffix(".git")
261269
if len(segments) == 2:
262270
return f"https://raw.githubusercontent.com/{owner}/{repo}/main/catalog/plugins.json"
263271
if len(segments) >= 5 and segments[2] == "blob":
@@ -289,21 +297,26 @@ def _catalog_plugins_url_path(catalog_root: str) -> str:
289297

290298
def _fetch_local_catalog(location: str) -> dict[str, object]:
291299
path = Path(location).expanduser()
292-
if not path.exists():
293-
raise PluginCatalogError(f"Plugin catalog file not found: {path}")
294-
if path.stat().st_size > MAX_PLUGIN_CATALOG_SIZE_BYTES:
295-
raise PluginCatalogError(
296-
f"Plugin catalog at {path} exceeds maximum size of {MAX_PLUGIN_CATALOG_SIZE_BYTES} bytes"
297-
)
300+
try:
301+
if not path.exists():
302+
raise _PluginCatalogSourceUnavailableError(f"Plugin catalog file not found: {path}")
303+
if path.stat().st_size > MAX_PLUGIN_CATALOG_SIZE_BYTES:
304+
raise _PluginCatalogContentError(
305+
f"Plugin catalog at {path} exceeds maximum size of {MAX_PLUGIN_CATALOG_SIZE_BYTES} bytes"
306+
)
307+
except OSError as e:
308+
raise _PluginCatalogSourceUnavailableError(f"Failed to read plugin catalog file {path}: {e}") from e
298309

299310
try:
300311
with open(path) as f:
301312
payload = json.load(f)
313+
except OSError as e:
314+
raise _PluginCatalogSourceUnavailableError(f"Failed to read plugin catalog file {path}: {e}") from e
302315
except json.JSONDecodeError as e:
303-
raise PluginCatalogError(f"Failed to parse plugin catalog JSON at {path}: {e}") from e
316+
raise _PluginCatalogContentError(f"Failed to parse plugin catalog JSON at {path}: {e}") from e
304317

305318
if not isinstance(payload, dict):
306-
raise PluginCatalogError(f"Plugin catalog at {path} must be a JSON object")
319+
raise _PluginCatalogContentError(f"Plugin catalog at {path} must be a JSON object")
307320
return payload
308321

309322

@@ -313,27 +326,29 @@ def _fetch_remote_catalog(url: str) -> dict[str, object]:
313326
with urlopen(request, timeout=10) as response:
314327
status = getattr(response, "status", 200)
315328
if isinstance(status, int) and status >= 400:
316-
raise PluginCatalogError(f"Failed to fetch plugin catalog {url!r}: HTTP {status}")
329+
raise _PluginCatalogSourceUnavailableError(f"Failed to fetch plugin catalog {url!r}: HTTP {status}")
317330
# Read one byte past the limit so oversized chunked responses are
318331
# rejected without keeping the full response body in memory.
319332
content = response.read(MAX_PLUGIN_CATALOG_SIZE_BYTES + 1)
320333
except HTTPError as e:
321-
raise PluginCatalogError(f"Failed to fetch plugin catalog {url!r}: HTTP {e.code}") from e
334+
raise _PluginCatalogSourceUnavailableError(f"Failed to fetch plugin catalog {url!r}: HTTP {e.code}") from e
322335
except URLError as e:
323-
raise PluginCatalogError(f"Failed to fetch plugin catalog {url!r}: {e.reason}") from e
336+
raise _PluginCatalogSourceUnavailableError(f"Failed to fetch plugin catalog {url!r}: {e.reason}") from e
337+
except OSError as e:
338+
raise _PluginCatalogSourceUnavailableError(f"Failed to read plugin catalog {url!r}: {e}") from e
324339

325340
if len(content) > MAX_PLUGIN_CATALOG_SIZE_BYTES:
326-
raise PluginCatalogError(
341+
raise _PluginCatalogContentError(
327342
f"Plugin catalog at {url!r} exceeds maximum size of {MAX_PLUGIN_CATALOG_SIZE_BYTES} bytes"
328343
)
329344

330345
try:
331346
payload = json.loads(content.decode("utf-8"))
332347
except (UnicodeDecodeError, json.JSONDecodeError) as e:
333-
raise PluginCatalogError(f"Failed to parse plugin catalog JSON at {url!r}: {e}") from e
348+
raise _PluginCatalogContentError(f"Failed to parse plugin catalog JSON at {url!r}: {e}") from e
334349

335350
if not isinstance(payload, dict):
336-
raise PluginCatalogError(f"Plugin catalog at {url!r} must be a JSON object")
351+
raise _PluginCatalogContentError(f"Plugin catalog at {url!r} must be a JSON object")
337352
return payload
338353

339354

packages/data-designer/tests/cli/repositories/test_plugin_catalog_repository.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,14 @@ def test_add_catalog_normalizes_github_repository_url(tmp_path: Path) -> None:
5050
assert repository.get_catalog("research") == catalog
5151

5252

53+
def test_add_catalog_normalizes_github_repository_url_with_git_suffix(tmp_path: Path) -> None:
54+
repository = PluginCatalogRepository(tmp_path)
55+
56+
catalog = repository.add_catalog("research", "https://github.com/acme/dd-plugins.git")
57+
58+
assert catalog.url == "https://raw.githubusercontent.com/acme/dd-plugins/main/catalog/plugins.json"
59+
60+
5361
def test_add_catalog_persists_only_public_catalog_fields(tmp_path: Path) -> None:
5462
repository = PluginCatalogRepository(tmp_path)
5563

@@ -69,6 +77,14 @@ def test_add_catalog_normalizes_github_tree_url_with_subdirectory(tmp_path: Path
6977
assert catalog.url == "https://raw.githubusercontent.com/acme/dd-plugins/main/custom-catalog/catalog/plugins.json"
7078

7179

80+
def test_add_catalog_normalizes_github_tree_url_with_git_suffix(tmp_path: Path) -> None:
81+
repository = PluginCatalogRepository(tmp_path)
82+
83+
catalog = repository.add_catalog("research", "https://github.com/acme/dd-plugins.git/tree/main/custom-catalog")
84+
85+
assert catalog.url == "https://raw.githubusercontent.com/acme/dd-plugins/main/custom-catalog/catalog/plugins.json"
86+
87+
7288
def test_add_catalog_normalizes_github_tree_url_ending_with_catalog(tmp_path: Path) -> None:
7389
repository = PluginCatalogRepository(tmp_path)
7490

@@ -77,6 +93,17 @@ def test_add_catalog_normalizes_github_tree_url_ending_with_catalog(tmp_path: Pa
7793
assert catalog.url == "https://raw.githubusercontent.com/acme/dd-plugins/main/catalog/plugins.json"
7894

7995

96+
def test_add_catalog_normalizes_github_blob_url_with_git_suffix(tmp_path: Path) -> None:
97+
repository = PluginCatalogRepository(tmp_path)
98+
99+
catalog = repository.add_catalog(
100+
"research",
101+
"https://github.com/acme/dd-plugins.git/blob/main/catalog/plugins.json",
102+
)
103+
104+
assert catalog.url == "https://raw.githubusercontent.com/acme/dd-plugins/main/catalog/plugins.json"
105+
106+
80107
def test_catalog_aliases_are_case_insensitive(tmp_path: Path) -> None:
81108
repository = PluginCatalogRepository(tmp_path)
82109

@@ -161,6 +188,18 @@ def test_load_catalog_does_not_fall_back_to_stale_cache_when_fresh_catalog_is_in
161188
repository.load_catalog("local")
162189

163190

191+
def test_load_catalog_does_not_fall_back_to_stale_cache_when_source_json_is_malformed(tmp_path: Path) -> None:
192+
catalog_path = _write_catalog(tmp_path, plugin_name="cached-transform")
193+
repository = PluginCatalogRepository(tmp_path)
194+
repository.add_catalog("local", str(catalog_path), cache_ttl_seconds=0)
195+
196+
repository.load_catalog("local")
197+
catalog_path.write_text("{")
198+
199+
with pytest.raises(PluginCatalogError, match="Failed to parse plugin catalog JSON"):
200+
repository.load_catalog("local")
201+
202+
164203
def test_load_catalog_with_zero_cache_ttl_refreshes_source(tmp_path: Path) -> None:
165204
catalog_path = _write_catalog(tmp_path, plugin_name="text-transform")
166205
repository = PluginCatalogRepository(tmp_path)

0 commit comments

Comments
 (0)