Skip to content

Commit 71d159e

Browse files
ianpittwoodclaude
andcommitted
fix(ci): fail fast on non-transient errors during source wait
The pre-flight source-digest wait was treating every oras fetch failure as a transient miss, so a genuine non-transient error (auth, bad reference) would poll for the full 10-minute timeout. `OrasWaitForSourcesWorkflow._is_available` now re-raises non-transient `BakeryToolRuntimeError`s instead of returning False, and `publish()` catches them to surface a clean `typer.Exit(1)` with the oras stderr rather than letting an unhandled traceback escape. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent 84aa506 commit 71d159e

4 files changed

Lines changed: 80 additions & 7 deletions

File tree

posit-bakery/posit_bakery/cli/ci.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,7 @@ def publish(
294294
"""
295295
# Imports kept local to mirror existing patterns and to avoid bloating
296296
# module load time when this command isn't invoked.
297+
from posit_bakery.error import BakeryToolRuntimeError
297298
from posit_bakery.plugins.builtin.oras.oras import (
298299
OrasIndexCopyWorkflow,
299300
OrasIndexCreateWorkflow,
@@ -365,10 +366,17 @@ def publish(
365366
all_sources = sorted({s for t in targets for s in t.get_merge_sources()})
366367
if all_sources:
367368
log.info(f"Waiting for {len(all_sources)} source digest(s) to be readable before publishing.")
368-
wait = OrasWaitForSourcesWorkflow(
369-
oras_bin=oras_bin,
370-
sources=all_sources,
371-
).run(dry_run=dry_run)
369+
try:
370+
wait = OrasWaitForSourcesWorkflow(
371+
oras_bin=oras_bin,
372+
sources=all_sources,
373+
).run(dry_run=dry_run)
374+
except BakeryToolRuntimeError as e:
375+
# A non-transient registry error (auth, bad reference, ...) while
376+
# probing sources is fatal and won't self-heal — surface it cleanly
377+
# rather than letting it escape as an unhandled traceback.
378+
log.error(f"Failed while waiting for source digests: {e.dump_stderr() or e}")
379+
raise typer.Exit(code=1)
372380
if not wait.success:
373381
log.error(f"Source digests not available: {wait.error}")
374382
raise typer.Exit(code=1)

posit-bakery/posit_bakery/plugins/builtin/oras/oras.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
from posit_bakery.error import BakeryToolRuntimeError
1313
from posit_bakery.image.image_target import ImageTarget, Tag
14-
from posit_bakery.retry import RetryPolicy, retry_on_transient
14+
from posit_bakery.retry import RetryPolicy, is_transient_error, retry_on_transient
1515
from posit_bakery.util import find_bin
1616

1717
log = logging.getLogger(__name__)
@@ -366,8 +366,10 @@ def _is_available(self, ref: str) -> bool:
366366
plain_http=self.plain_http,
367367
).run(dry_run=False)
368368
return True
369-
except BakeryToolRuntimeError:
370-
return False
369+
except BakeryToolRuntimeError as e:
370+
if is_transient_error(e):
371+
return False
372+
raise
371373

372374
def run(
373375
self,

posit-bakery/test/cli/test_ci_publish.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,3 +165,43 @@ def test_publish_aborts_when_sources_never_ready(tmp_path):
165165
assert result.exit_code == 1
166166
# Aborted before SOCI convert.
167167
fake_soci.execute.assert_not_called()
168+
169+
170+
def test_publish_surfaces_clean_error_on_non_transient_wait_failure(tmp_path):
171+
"""A non-transient registry error during the wait exits cleanly (code 1)
172+
rather than escaping as an unhandled traceback."""
173+
from posit_bakery.error import BakeryToolRuntimeError
174+
175+
sources = ["ghcr.io/posit-dev/test/tmp@sha256:amd64"]
176+
target = _fake_target("uid1", merge_sources=sources)
177+
target.settings.temp_registry = "ghcr.io/posit-dev"
178+
179+
fake_config = MagicMock()
180+
fake_config.base_path = tmp_path
181+
fake_config.load_build_metadata_from_file.return_value = ["uid1"]
182+
fake_config.get_image_target_by_uid.return_value = target
183+
184+
fake_wait_instance = MagicMock()
185+
fake_wait_instance.run.side_effect = BakeryToolRuntimeError(
186+
message="oras command failed",
187+
tool_name="oras",
188+
cmd=["oras", "manifest", "fetch"],
189+
stdout=b"",
190+
stderr=b"unauthorized: authentication required",
191+
)
192+
193+
fake_soci = MagicMock()
194+
195+
runner = CliRunner()
196+
with (
197+
patch("posit_bakery.cli.ci.BakeryConfig.from_context", return_value=fake_config),
198+
patch("posit_bakery.plugins.builtin.oras.oras.find_oras_bin", return_value="oras"),
199+
patch("posit_bakery.plugins.builtin.oras.oras.OrasWaitForSourcesWorkflow", return_value=fake_wait_instance),
200+
patch("posit_bakery.plugins.registry.get_plugin", return_value=fake_soci),
201+
):
202+
result = runner.invoke(app, ["ci", "publish", "meta.json"], env=_WIDE_TERM_ENV)
203+
204+
# Clean exit, not an unhandled exception.
205+
assert result.exit_code == 1
206+
assert result.exception is None or isinstance(result.exception, SystemExit)
207+
fake_soci.execute.assert_not_called()

posit-bakery/test/plugins/builtin/oras/test_oras.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -951,6 +951,29 @@ def sleep(seconds):
951951
assert result.missing == ["ghcr.io/posit-dev/test/tmp@sha256:b"]
952952
assert "still unreadable" in result.error
953953

954+
def test_non_transient_error_raises_immediately(self):
955+
"""A non-transient fetch error (e.g. auth) must not be polled on — it
956+
raises right away instead of burning the full timeout."""
957+
wf = OrasWaitForSourcesWorkflow(
958+
oras_bin="oras",
959+
sources=["ghcr.io/posit-dev/test/tmp@sha256:a"],
960+
poll_interval=5.0,
961+
timeout=600.0,
962+
)
963+
sleep = MagicMock()
964+
965+
def side_effect(cmd, capture_output):
966+
return subprocess.CompletedProcess(
967+
args=cmd, returncode=1, stdout=b"", stderr=b"unauthorized: authentication required"
968+
)
969+
970+
with patch("subprocess.run", side_effect=side_effect):
971+
with pytest.raises(BakeryToolRuntimeError):
972+
wf.run(sleep=sleep, now=lambda: 0.0)
973+
974+
# Failed fast: no backoff sleep, no waiting for the timeout.
975+
sleep.assert_not_called()
976+
954977
def test_dry_run_skips_polling(self):
955978
wf = OrasWaitForSourcesWorkflow(oras_bin="oras", sources=["ghcr.io/posit-dev/test/tmp@sha256:a"])
956979
with patch("subprocess.run") as mock_run:

0 commit comments

Comments
 (0)