Skip to content

Commit f512b8b

Browse files
lselvarclaudemnriemCopilot
authored
fix: resolve GitHub release asset API URL for private repo preset and workflow downloads (#2855)
* fix: resolve GitHub release asset API URL for private repo preset and workflow downloads - Add shared `resolve_github_release_asset_api_url` utility to `_github_http.py` for reuse across preset and workflow download paths - Apply the same private-repo fix from PR #2792 (extensions) to: - `PresetCatalog.download_pack` — ZIP downloads via catalog `download_url` - `preset add --from <url>` — ZIP downloads from a direct URL - `workflow add <url>` — workflow YAML downloads from a direct URL - `workflow add <id>` (catalog) — workflow YAML downloads via catalog `url` - For browser release URLs (`github.com/…/releases/download/…`), the asset is resolved via the GitHub REST API and downloaded with `Accept: application/octet-stream` - Direct REST API asset URLs (`api.github.com/…/releases/assets/<id>`) are downloaded directly with `Accept: application/octet-stream` - Auth is preserved end-to-end through the existing `open_url` infrastructure - Update `test_download_pack_sends_auth_header` and add `test_download_pack_accepts_direct_github_rest_asset_url` to cover both paths Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> * fix: URL-encode tag in release API URL to handle special characters Encode the tag as a path segment (using quote with safe='') when building the releases/tags/<tag> API URL. This prevents malformed URLs when tags contain reserved characters like '/' or '#'. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * test: add CLI-level tests for preset add --from GitHub release URL resolution Adds regression tests covering: - resolve_github_release_asset_api_url unit tests (passthrough, resolution, network error, URL encoding of special chars in tags) - CLI-level 'preset add --from <github-release-url>' end-to-end flow - CLI-level 'preset add --from <api-asset-url>' direct passthrough Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * refactor: deduplicate release URL resolution; fix test issues - ExtensionCatalog._resolve_github_release_asset_api_url now delegates to the shared helper in _github_http.py (also gains URL-encoding fix) - Remove unused 'io' import from test_github_http.py - Remove duplicate 'provides' dict keys accidentally added to test_presets.py Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * fix: align resolver timeout with download timeout; add workflow CLI tests - Pass timeout=30 to resolve_github_release_asset_api_url in both workflow add paths so worst-case latency matches the download timeout - Add CLI-level regression tests for 'workflow add <url>' covering browser URL resolution and direct API asset URL passthrough Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * fix: remove unused urllib.request import; add catalog workflow test - Remove unused 'import urllib.request' in preset add --from path - Add CLI test for catalog-based 'workflow add <id>' with GitHub release URL resolution Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * style: remove unused MagicMock imports from tests Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --------- Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com> Co-authored-by: Manfred Riem <mnriem@users.noreply.github.com> Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 19c2657 commit f512b8b

7 files changed

Lines changed: 613 additions & 52 deletions

File tree

src/specify_cli/__init__.py

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -702,16 +702,22 @@ def preset_add(
702702
raise typer.Exit(1)
703703

704704
console.print(f"Installing preset from [cyan]{from_url}[/cyan]...")
705-
import urllib.request
706705
import urllib.error
707706
import tempfile
708707

709708
with tempfile.TemporaryDirectory() as tmpdir:
710709
zip_path = Path(tmpdir) / "preset.zip"
711710
try:
712711
from specify_cli.authentication.http import open_url as _open_url
712+
from specify_cli._github_http import resolve_github_release_asset_api_url
713713

714-
with _open_url(from_url, timeout=60) as response:
714+
_preset_extra_headers = None
715+
_resolved_from_url = resolve_github_release_asset_api_url(from_url, _open_url)
716+
if _resolved_from_url:
717+
from_url = _resolved_from_url
718+
_preset_extra_headers = {"Accept": "application/octet-stream"}
719+
720+
with _open_url(from_url, timeout=60, extra_headers=_preset_extra_headers) as response:
715721
zip_path.write_bytes(response.read())
716722
except urllib.error.URLError as e:
717723
console.print(f"[red]Error:[/red] Failed to download: {e}")
@@ -3065,9 +3071,17 @@ def _validate_and_install_local(yaml_path: Path, source_label: str) -> None:
30653071
console.print("[red]Error:[/red] Only HTTPS URLs are allowed, except HTTP for localhost.")
30663072
raise typer.Exit(1)
30673073

3074+
from specify_cli._github_http import resolve_github_release_asset_api_url as _resolve_gh_asset
3075+
3076+
_wf_url_extra_headers = None
3077+
_resolved_wf_url = _resolve_gh_asset(source, _open_url, timeout=30)
3078+
if _resolved_wf_url:
3079+
source = _resolved_wf_url
3080+
_wf_url_extra_headers = {"Accept": "application/octet-stream"}
3081+
30683082
import tempfile
30693083
try:
3070-
with _open_url(source, timeout=30) as resp:
3084+
with _open_url(source, timeout=30, extra_headers=_wf_url_extra_headers) as resp:
30713085
final_url = resp.geturl()
30723086
final_parsed = urlparse(final_url)
30733087
final_host = final_parsed.hostname or ""
@@ -3164,9 +3178,16 @@ def _validate_and_install_local(yaml_path: Path, source_label: str) -> None:
31643178

31653179
try:
31663180
from specify_cli.authentication.http import open_url as _open_url
3181+
from specify_cli._github_http import resolve_github_release_asset_api_url as _resolve_gh_asset
3182+
3183+
_wf_cat_extra_headers = None
3184+
_resolved_workflow_url = _resolve_gh_asset(workflow_url, _open_url, timeout=30)
3185+
if _resolved_workflow_url:
3186+
workflow_url = _resolved_workflow_url
3187+
_wf_cat_extra_headers = {"Accept": "application/octet-stream"}
31673188

31683189
workflow_dir.mkdir(parents=True, exist_ok=True)
3169-
with _open_url(workflow_url, timeout=30) as response:
3190+
with _open_url(workflow_url, timeout=30, extra_headers=_wf_cat_extra_headers) as response:
31703191
# Validate final URL after redirects
31713192
final_url = response.geturl()
31723193
final_parsed = urlparse(final_url)

src/specify_cli/_github_http.py

Lines changed: 75 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@
88

99
import os
1010
import urllib.request
11-
from typing import Dict
12-
from urllib.parse import urlparse
11+
from typing import Callable, Dict, Optional
12+
from urllib.parse import quote, unquote, urlparse
1313

1414
# GitHub-owned hostnames that should receive the Authorization header.
1515
# Includes codeload.github.com because GitHub archive URL downloads
@@ -76,6 +76,79 @@ def redirect_request(self, req, fp, code, msg, headers, newurl):
7676
return new_req
7777

7878

79+
def resolve_github_release_asset_api_url(
80+
download_url: str,
81+
open_url_fn: Callable,
82+
timeout: int = 60,
83+
) -> Optional[str]:
84+
"""Resolve a GitHub browser release URL to its REST API asset URL.
85+
86+
For private or SSO-protected repositories, browser release download
87+
URLs (``https://github.com/<owner>/<repo>/releases/download/<tag>/<asset>``)
88+
redirect to an HTML/SSO page instead of delivering the file. This
89+
helper resolves such a URL to the matching GitHub REST API asset URL
90+
(``https://api.github.com/repos/…/releases/assets/<id>``), which can
91+
then be downloaded with ``Accept: application/octet-stream`` and an
92+
auth token to retrieve the actual file payload.
93+
94+
If *download_url* is already a REST API asset URL, it is returned
95+
as-is. Non-GitHub URLs and GitHub URLs that are not release-download
96+
URLs return ``None``. If the API lookup fails (e.g. network error or
97+
asset not found), ``None`` is returned so callers can fall back to the
98+
original URL.
99+
100+
Args:
101+
download_url: The URL to resolve.
102+
open_url_fn: A callable compatible with
103+
``specify_cli.authentication.http.open_url`` used to make the
104+
authenticated API request.
105+
timeout: Per-request timeout in seconds.
106+
107+
Returns:
108+
The resolved REST API asset URL, or ``None`` if resolution is not
109+
applicable or fails.
110+
"""
111+
import json
112+
import urllib.error
113+
114+
parsed = urlparse(download_url)
115+
parts = [unquote(part) for part in parsed.path.strip("/").split("/")]
116+
117+
# Already a REST API asset URL — use it directly
118+
if (
119+
parsed.hostname == "api.github.com"
120+
and len(parts) >= 6
121+
and parts[:1] == ["repos"]
122+
and parts[3:5] == ["releases", "assets"]
123+
):
124+
return download_url
125+
126+
# Only handle github.com browser release download URLs
127+
if parsed.hostname != "github.com":
128+
return None
129+
130+
# Expecting /<owner>/<repo>/releases/download/<tag>/<asset>
131+
if len(parts) < 6 or parts[2:4] != ["releases", "download"]:
132+
return None
133+
134+
owner, repo, tag = parts[0], parts[1], parts[4]
135+
asset_name = "/".join(parts[5:])
136+
encoded_tag = quote(tag, safe="")
137+
release_url = f"https://api.github.com/repos/{owner}/{repo}/releases/tags/{encoded_tag}"
138+
139+
try:
140+
with open_url_fn(release_url, timeout=timeout) as response:
141+
release_data = json.loads(response.read())
142+
except (urllib.error.URLError, json.JSONDecodeError):
143+
return None
144+
145+
for asset in release_data.get("assets", []):
146+
if asset.get("name") == asset_name and asset.get("url"):
147+
return str(asset["url"])
148+
149+
return None
150+
151+
79152
def open_github_url(url: str, timeout: int = 10):
80153
"""Open a URL with GitHub auth, stripping the header on cross-host redirects.
81154

src/specify_cli/extensions.py

Lines changed: 7 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1861,41 +1861,15 @@ def _resolve_github_release_asset_api_url(
18611861
download_url: str,
18621862
timeout: int = 60,
18631863
) -> Optional[str]:
1864-
"""Resolve a GitHub release asset URL to its API asset URL."""
1865-
import urllib.error
1866-
from urllib.parse import unquote, urlparse
1867-
1868-
parsed = urlparse(download_url)
1869-
parts = [unquote(part) for part in parsed.path.strip("/").split("/")]
1870-
if (
1871-
parsed.hostname == "api.github.com"
1872-
and len(parts) >= 6
1873-
and parts[:1] == ["repos"]
1874-
and parts[3:5] == ["releases", "assets"]
1875-
):
1876-
return download_url
1877-
1878-
if parsed.hostname != "github.com":
1879-
return None
1880-
1881-
if len(parts) < 6 or parts[2:4] != ["releases", "download"]:
1882-
return None
1883-
1884-
owner, repo, tag = parts[0], parts[1], parts[4]
1885-
asset_name = "/".join(parts[5:])
1886-
release_url = f"https://api.github.com/repos/{owner}/{repo}/releases/tags/{tag}"
1864+
"""Resolve a GitHub release asset URL to its API asset URL.
18871865
1888-
try:
1889-
with self._open_url(release_url, timeout=timeout) as response:
1890-
release_data = json.loads(response.read())
1891-
except (urllib.error.URLError, json.JSONDecodeError):
1892-
return None
1893-
1894-
for asset in release_data.get("assets", []):
1895-
if asset.get("name") == asset_name and asset.get("url"):
1896-
return str(asset["url"])
1866+
Delegates to the shared helper in :mod:`specify_cli._github_http`.
1867+
"""
1868+
from specify_cli._github_http import resolve_github_release_asset_api_url
18971869

1898-
return None
1870+
return resolve_github_release_asset_api_url(
1871+
download_url, self._open_url, timeout=timeout
1872+
)
18991873

19001874
def get_active_catalogs(self) -> List[CatalogEntry]:
19011875
"""Get the ordered list of active catalogs.

src/specify_cli/presets.py

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1868,13 +1868,29 @@ def _make_request(self, url: str):
18681868
from specify_cli.authentication.http import build_request
18691869
return build_request(url)
18701870

1871-
def _open_url(self, url: str, timeout: int = 10):
1871+
def _open_url(
1872+
self,
1873+
url: str,
1874+
timeout: int = 10,
1875+
extra_headers: Optional[Dict[str, str]] = None,
1876+
):
18721877
"""Open a URL with provider-based auth, trying each configured provider.
18731878
18741879
Delegates to :func:`specify_cli.authentication.http.open_url`.
18751880
"""
18761881
from specify_cli.authentication.http import open_url
1877-
return open_url(url, timeout)
1882+
return open_url(url, timeout, extra_headers=extra_headers)
1883+
1884+
def _resolve_github_release_asset_api_url(
1885+
self,
1886+
download_url: str,
1887+
timeout: int = 60,
1888+
) -> Optional[str]:
1889+
"""Resolve a GitHub release asset URL to its REST API asset URL."""
1890+
from specify_cli._github_http import resolve_github_release_asset_api_url
1891+
return resolve_github_release_asset_api_url(
1892+
download_url, self._open_url, timeout=timeout
1893+
)
18781894

18791895
def _load_catalog_config(self, config_path: Path) -> Optional[List[PresetCatalogEntry]]:
18801896
"""Load catalog stack configuration from a YAML file.
@@ -2332,8 +2348,14 @@ def download_pack(
23322348
zip_filename = f"{pack_id}-{version}.zip"
23332349
zip_path = target_dir / zip_filename
23342350

2351+
extra_headers = None
2352+
resolved_download_url = self._resolve_github_release_asset_api_url(download_url)
2353+
if resolved_download_url:
2354+
download_url = resolved_download_url
2355+
extra_headers = {"Accept": "application/octet-stream"}
2356+
23352357
try:
2336-
with self._open_url(download_url, timeout=60) as response:
2358+
with self._open_url(download_url, timeout=60, extra_headers=extra_headers) as response:
23372359
zip_data = response.read()
23382360

23392361
zip_path.write_bytes(zip_data)

tests/test_github_http.py

Lines changed: 113 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,15 @@
11
"""Tests for GitHub-authenticated HTTP request helpers."""
22

3+
import json
34
import os
4-
from unittest.mock import patch
5+
from contextlib import contextmanager
6+
from unittest.mock import MagicMock, patch
57

68
import pytest
79

810
from specify_cli._github_http import (
911
build_github_request,
12+
resolve_github_release_asset_api_url,
1013
)
1114

1215

@@ -76,4 +79,112 @@ def test_no_auth_header_when_no_token(self):
7679
def test_missing_hostname_raises_value_error(self):
7780
"""build_github_request() must reject URLs with valid scheme but no hostname."""
7881
with pytest.raises(ValueError, match="url must include a hostname"):
79-
build_github_request("http://")
82+
build_github_request("http://")
83+
84+
85+
class TestResolveGitHubReleaseAssetApiUrl:
86+
"""Tests for resolve_github_release_asset_api_url()."""
87+
88+
def _make_open_url_fn(self, release_json):
89+
"""Create a fake open_url_fn that returns release JSON."""
90+
@contextmanager
91+
def fake_open(url, timeout=None, extra_headers=None):
92+
resp = MagicMock()
93+
resp.read.return_value = json.dumps(release_json).encode()
94+
yield resp
95+
return fake_open
96+
97+
def test_returns_none_for_non_github_url(self):
98+
"""Non-GitHub URLs should return None."""
99+
result = resolve_github_release_asset_api_url(
100+
"https://example.com/file.zip", lambda *a, **kw: None
101+
)
102+
assert result is None
103+
104+
def test_returns_none_for_non_release_github_url(self):
105+
"""GitHub URLs that aren't release downloads return None."""
106+
result = resolve_github_release_asset_api_url(
107+
"https://github.com/org/repo/archive/refs/tags/v1.zip",
108+
lambda *a, **kw: None,
109+
)
110+
assert result is None
111+
112+
def test_passthrough_for_existing_api_asset_url(self):
113+
"""Already-resolved REST API asset URLs are returned as-is."""
114+
url = "https://api.github.com/repos/org/repo/releases/assets/12345"
115+
result = resolve_github_release_asset_api_url(url, lambda *a, **kw: None)
116+
assert result == url
117+
118+
def test_resolves_browser_url_to_api_url(self):
119+
"""Browser release URL resolves to REST API asset URL."""
120+
release_json = {
121+
"assets": [
122+
{"name": "pack.zip", "url": "https://api.github.com/repos/org/repo/releases/assets/99"}
123+
]
124+
}
125+
result = resolve_github_release_asset_api_url(
126+
"https://github.com/org/repo/releases/download/v1.0/pack.zip",
127+
self._make_open_url_fn(release_json),
128+
)
129+
assert result == "https://api.github.com/repos/org/repo/releases/assets/99"
130+
131+
def test_returns_none_when_asset_not_found(self):
132+
"""Returns None when the release exists but asset name doesn't match."""
133+
release_json = {"assets": [{"name": "other.zip", "url": "https://api.github.com/repos/org/repo/releases/assets/1"}]}
134+
result = resolve_github_release_asset_api_url(
135+
"https://github.com/org/repo/releases/download/v1/missing.zip",
136+
self._make_open_url_fn(release_json),
137+
)
138+
assert result is None
139+
140+
def test_returns_none_on_network_error(self):
141+
"""Returns None when the API request fails."""
142+
import urllib.error
143+
144+
@contextmanager
145+
def failing_open(url, timeout=None, extra_headers=None):
146+
raise urllib.error.URLError("network error")
147+
yield # noqa: unreachable
148+
149+
result = resolve_github_release_asset_api_url(
150+
"https://github.com/org/repo/releases/download/v1/pack.zip",
151+
failing_open,
152+
)
153+
assert result is None
154+
155+
def test_tag_with_special_characters_is_url_encoded(self):
156+
"""Tags with reserved characters (e.g. '/') are encoded in the API URL."""
157+
captured_urls = []
158+
159+
@contextmanager
160+
def capturing_open(url, timeout=None, extra_headers=None):
161+
captured_urls.append(url)
162+
resp = MagicMock()
163+
resp.read.return_value = json.dumps({"assets": []}).encode()
164+
yield resp
165+
166+
resolve_github_release_asset_api_url(
167+
"https://github.com/org/repo/releases/download/feature%2Fv1/pack.zip",
168+
capturing_open,
169+
)
170+
# The tag "feature/v1" (decoded from %2F) must be re-encoded as "feature%2Fv1"
171+
assert len(captured_urls) == 1
172+
assert "releases/tags/feature%2Fv1" in captured_urls[0]
173+
174+
def test_tag_with_hash_is_url_encoded(self):
175+
"""Tags with '#' character are properly encoded."""
176+
captured_urls = []
177+
178+
@contextmanager
179+
def capturing_open(url, timeout=None, extra_headers=None):
180+
captured_urls.append(url)
181+
resp = MagicMock()
182+
resp.read.return_value = json.dumps({"assets": []}).encode()
183+
yield resp
184+
185+
resolve_github_release_asset_api_url(
186+
"https://github.com/org/repo/releases/download/v1%23beta/pack.zip",
187+
capturing_open,
188+
)
189+
assert len(captured_urls) == 1
190+
assert "releases/tags/v1%23beta" in captured_urls[0]

0 commit comments

Comments
 (0)