From 78b666eb68da10cf8959d9bccd3e2f574b32e2d5 Mon Sep 17 00:00:00 2001
From: kapoor28 <kapoor28@wisc.edu>
Date: Wed, 10 Jun 2026 11:45:03 -0700
Subject: [PATCH 1/2] add structured error types and retry logic for 429/5xx

---
 src/gumloop/__init__.py  |  12 +++
 src/gumloop/_client.py   |   7 ++
 src/gumloop/_http.py     | 118 ++++++++++++++++++------
 src/gumloop/errors.py    |  47 +++++++++-
 tests/sdk/test_errors.py | 187 +++++++++++++++++++++++++++++++++++++++
 5 files changed, 340 insertions(+), 31 deletions(-)
 create mode 100644 tests/sdk/test_errors.py

diff --git a/src/gumloop/__init__.py b/src/gumloop/__init__.py
index 197f87e..b16ad9f 100644
--- a/src/gumloop/__init__.py
+++ b/src/gumloop/__init__.py
@@ -3,7 +3,13 @@
 from gumloop._client import GumloopClient
 from gumloop.errors import APIStatusError
 from gumloop.errors import AuthenticationError
+from gumloop.errors import BadRequestError
 from gumloop.errors import GumloopError
+from gumloop.errors import NotFoundError
+from gumloop.errors import PermissionDeniedError
+from gumloop.errors import RateLimitError
+from gumloop.errors import ServerError
+from gumloop.errors import UnprocessableEntityError
 from gumloop.oauth import OAuth
 
 __version__ = "0.3.1"
@@ -11,8 +17,14 @@
     "APIStatusError",
     "AsyncGumloop",
     "AuthenticationError",
+    "BadRequestError",
     "Gumloop",
     "GumloopClient",
     "GumloopError",
+    "NotFoundError",
     "OAuth",
+    "PermissionDeniedError",
+    "RateLimitError",
+    "ServerError",
+    "UnprocessableEntityError",
 ]
diff --git a/src/gumloop/_client.py b/src/gumloop/_client.py
index b5632c1..52d1be9 100644
--- a/src/gumloop/_client.py
+++ b/src/gumloop/_client.py
@@ -7,6 +7,7 @@
 
 import httpx
 
+from gumloop._http import DEFAULT_MAX_RETRIES
 from gumloop._http import AsyncHttpClient
 from gumloop._http import HttpClient
 from gumloop.oauth import OAuth
@@ -55,6 +56,7 @@ def __init__(
         stream_base_url: str | None = None,
         timeout: float = DEFAULT_TIMEOUT,
         stream_timeout: float | None = DEFAULT_STREAM_TIMEOUT,
+        max_retries: int = DEFAULT_MAX_RETRIES,
     ) -> None:
         self.api_key = api_key
         self.access_token = access_token or api_key or os.environ.get("GUMLOOP_ACCESS_TOKEN")
@@ -67,6 +69,7 @@ def __init__(
         self.stream_base_url = (stream_base_url or _derive_stream_base_url(self.base_url)).rstrip("/")
         self.timeout = timeout
         self.stream_timeout = stream_timeout
+        self.max_retries = max_retries
 
         self._http = HttpClient(
             base_url=self.base_url,
@@ -75,6 +78,7 @@ def __init__(
             user_id=self.user_id,
             timeout=self.timeout,
             stream_timeout=self.stream_timeout,
+            max_retries=self.max_retries,
         )
 
         self.agents = Agents(self._http)
@@ -110,6 +114,7 @@ def __init__(
         stream_base_url: str | None = None,
         timeout: float = DEFAULT_TIMEOUT,
         stream_timeout: float | None = DEFAULT_STREAM_TIMEOUT,
+        max_retries: int = DEFAULT_MAX_RETRIES,
     ) -> None:
         self.api_key = api_key
         self.access_token = access_token or api_key or os.environ.get("GUMLOOP_ACCESS_TOKEN")
@@ -119,6 +124,7 @@ def __init__(
         self.stream_base_url = (stream_base_url or _derive_stream_base_url(self.base_url)).rstrip("/")
         self.timeout = timeout
         self.stream_timeout = stream_timeout
+        self.max_retries = max_retries
 
         self._http = AsyncHttpClient(
             base_url=self.base_url,
@@ -127,6 +133,7 @@ def __init__(
             user_id=self.user_id,
             timeout=self.timeout,
             stream_timeout=self.stream_timeout,
+            max_retries=self.max_retries,
         )
 
         self.agents = AsyncAgents(self._http)
diff --git a/src/gumloop/_http.py b/src/gumloop/_http.py
index 3380aef..8877272 100644
--- a/src/gumloop/_http.py
+++ b/src/gumloop/_http.py
@@ -1,6 +1,9 @@
 from __future__ import annotations
 
+import asyncio
 import logging
+import random
+import time
 from collections.abc import AsyncIterator
 from collections.abc import Iterator
 from collections.abc import Mapping
@@ -13,7 +16,10 @@
 from pydantic import BaseModel
 from pydantic import ValidationError
 
+from gumloop.errors import APIStatusError
 from gumloop.errors import AuthenticationError
+from gumloop.errors import RateLimitError
+from gumloop.errors import ServerError
 from gumloop.errors import to_api_error
 from gumloop.types import StreamEvent
 
@@ -22,6 +28,11 @@
 _DONE_SENTINEL = "[DONE]"
 _T = TypeVar("_T", bound=BaseModel)
 
+DEFAULT_MAX_RETRIES = 2
+# Base delay in seconds for exponential backoff; actual delay is base * 2^attempt + jitter.
+_RETRY_BASE_DELAY = 0.5
+_RETRY_MAX_DELAY = 60.0
+
 
 def _auth_headers(access_token: str | None, user_id: str | None) -> dict[str, str]:
     if not access_token:
@@ -41,6 +52,33 @@ def _omit_none_params(params: Mapping[str, Any] | None) -> dict[str, Any] | None
     return {k: v for k, v in params.items() if v is not None}
 
 
+def _should_retry(exc: APIStatusError) -> bool:
+    # Retry on rate-limit and transient server errors; never retry client errors.
+    return isinstance(exc, (RateLimitError, ServerError))
+
+
+def _retry_delay(attempt: int, retry_after: float | None) -> float:
+    """Return how many seconds to sleep before the next attempt.
+
+    Honours a ``Retry-After`` header when present; otherwise uses exponential
+    backoff with full jitter so concurrent clients don't thunderherd.
+    """
+    if retry_after is not None:
+        return retry_after
+    cap = min(_RETRY_BASE_DELAY * (2**attempt), _RETRY_MAX_DELAY)
+    return random.uniform(0, cap)
+
+
+def _parse_retry_after(response: httpx.Response) -> float | None:
+    raw = response.headers.get("retry-after")
+    if raw is None:
+        return None
+    try:
+        return float(raw)
+    except ValueError:
+        return None
+
+
 def _decode_sse(event: ServerSentEvent) -> StreamEvent:
     try:
         decoded: Any = event.json() if event.data else {}
@@ -71,11 +109,13 @@ def __init__(
         user_id: str | None,
         timeout: float,
         stream_timeout: float | None,
+        max_retries: int = DEFAULT_MAX_RETRIES,
     ) -> None:
         self.access_token = access_token
         self.user_id = user_id
         self._stream_base_url = stream_base_url.rstrip("/")
         self._stream_timeout = stream_timeout
+        self._max_retries = max_retries
         self._client = httpx.Client(base_url=base_url.rstrip("/"), timeout=timeout)
 
     def close(self) -> None:
@@ -119,15 +159,18 @@ def post_to_stream_host(self, path: str, *, json: Any = None) -> Any:
         # the api host has no handler for them.
         headers = _auth_headers(self.access_token, self.user_id)
         headers["Content-Type"] = "application/json"
-        response = self._client.post(
-            f"{self._stream_base_url}/{path.lstrip('/')}",
-            headers=headers,
-            timeout=self._stream_timeout,
-            json=json,
-        )
-        if response.status_code >= 400:
-            raise to_api_error(response)
-        return response.json() if response.content else None
+        url = f"{self._stream_base_url}/{path.lstrip('/')}"
+        for attempt in range(self._max_retries + 1):
+            response = self._client.post(url, headers=headers, timeout=self._stream_timeout, json=json)
+            if response.status_code < 400:
+                return response.json() if response.content else None
+            exc = to_api_error(response)
+            if attempt < self._max_retries and _should_retry(exc):
+                delay = _retry_delay(attempt, _parse_retry_after(response))
+                logger.debug("retrying stream-host request (attempt %d, delay %.2fs)", attempt + 1, delay)
+                time.sleep(delay)
+                continue
+            raise exc
 
     def stream(
         self,
@@ -184,7 +227,7 @@ def stream_typed(
                     yield response_model.model_validate_json(event.data)
                 except ValidationError:
                     # Server-side mid-stream error frames or schema-drift events
-                    # land here. 
+                    # land here.
                     logger.debug("dropped non-%s SSE: %s", response_model.__name__, event.data)
                     continue
 
@@ -194,10 +237,17 @@ def _request(self, method: str, path: str, **kwargs: Any) -> Any:
         headers = _auth_headers(self.access_token, self.user_id)
         if not kwargs.get("files"):
             headers["Content-Type"] = "application/json"
-        response = self._client.request(method, path, headers=headers, **kwargs)
-        if response.status_code >= 400:
-            raise to_api_error(response)
-        return response.json() if response.content else None
+        for attempt in range(self._max_retries + 1):
+            response = self._client.request(method, path, headers=headers, **kwargs)
+            if response.status_code < 400:
+                return response.json() if response.content else None
+            exc = to_api_error(response)
+            if attempt < self._max_retries and _should_retry(exc):
+                delay = _retry_delay(attempt, _parse_retry_after(response))
+                logger.debug("retrying %s %s (attempt %d, delay %.2fs)", method, path, attempt + 1, delay)
+                time.sleep(delay)
+                continue
+            raise exc
 
 
 class AsyncHttpClient:
@@ -212,11 +262,13 @@ def __init__(
         user_id: str | None,
         timeout: float,
         stream_timeout: float | None,
+        max_retries: int = DEFAULT_MAX_RETRIES,
     ) -> None:
         self.access_token = access_token
         self.user_id = user_id
         self._stream_base_url = stream_base_url.rstrip("/")
         self._stream_timeout = stream_timeout
+        self._max_retries = max_retries
         self._client = httpx.AsyncClient(base_url=base_url.rstrip("/"), timeout=timeout)
 
     async def aclose(self) -> None:
@@ -257,15 +309,18 @@ async def delete(self, path: str) -> Any:
     async def post_to_stream_host(self, path: str, *, json: Any = None) -> Any:
         headers = _auth_headers(self.access_token, self.user_id)
         headers["Content-Type"] = "application/json"
-        response = await self._client.post(
-            f"{self._stream_base_url}/{path.lstrip('/')}",
-            headers=headers,
-            timeout=self._stream_timeout,
-            json=json,
-        )
-        if response.status_code >= 400:
-            raise to_api_error(response)
-        return response.json() if response.content else None
+        url = f"{self._stream_base_url}/{path.lstrip('/')}"
+        for attempt in range(self._max_retries + 1):
+            response = await self._client.post(url, headers=headers, timeout=self._stream_timeout, json=json)
+            if response.status_code < 400:
+                return response.json() if response.content else None
+            exc = to_api_error(response)
+            if attempt < self._max_retries and _should_retry(exc):
+                delay = _retry_delay(attempt, _parse_retry_after(response))
+                logger.debug("retrying stream-host request (attempt %d, delay %.2fs)", attempt + 1, delay)
+                await asyncio.sleep(delay)
+                continue
+            raise exc
 
     async def stream(
         self,
@@ -320,7 +375,7 @@ async def stream_typed(
                     yield response_model.model_validate_json(event.data)
                 except ValidationError:
                     # Server-side mid-stream error frames or schema-drift events
-                    # land here. 
+                    # land here.
                     logger.debug("dropped non-%s SSE: %s", response_model.__name__, event.data)
                     continue
 
@@ -328,7 +383,14 @@ async def _request(self, method: str, path: str, **kwargs: Any) -> Any:
         headers = _auth_headers(self.access_token, self.user_id)
         if not kwargs.get("files"):
             headers["Content-Type"] = "application/json"
-        response = await self._client.request(method, path, headers=headers, **kwargs)
-        if response.status_code >= 400:
-            raise to_api_error(response)
-        return response.json() if response.content else None
+        for attempt in range(self._max_retries + 1):
+            response = await self._client.request(method, path, headers=headers, **kwargs)
+            if response.status_code < 400:
+                return response.json() if response.content else None
+            exc = to_api_error(response)
+            if attempt < self._max_retries and _should_retry(exc):
+                delay = _retry_delay(attempt, _parse_retry_after(response))
+                logger.debug("retrying %s %s (attempt %d, delay %.2fs)", method, path, attempt + 1, delay)
+                await asyncio.sleep(delay)
+                continue
+            raise exc
diff --git a/src/gumloop/errors.py b/src/gumloop/errors.py
index 0c8665e..5831e9b 100644
--- a/src/gumloop/errors.py
+++ b/src/gumloop/errors.py
@@ -27,9 +27,43 @@ def __init__(self, message: str, *, status_code: int, body: Any = None) -> None:
         self.details = self.error.get("details", {}) if isinstance(self.error, dict) else {}
 
 
+class BadRequestError(APIStatusError):
+    """HTTP 400 — the request was malformed or contained invalid parameters."""
+
+
+class PermissionDeniedError(APIStatusError):
+    """HTTP 403 — the caller does not have permission to perform this action."""
+
+
+class NotFoundError(APIStatusError):
+    """HTTP 404 — the requested resource does not exist."""
+
+
+class UnprocessableEntityError(APIStatusError):
+    """HTTP 422 — the request was well-formed but semantically invalid."""
+
+
+class RateLimitError(APIStatusError):
+    """HTTP 429 — too many requests; back off and retry."""
+
+
+class ServerError(APIStatusError):
+    """HTTP 5xx — an unexpected error occurred on the Gumloop server."""
+
+
+_STATUS_MAP: dict[int, type[APIStatusError]] = {
+    400: BadRequestError,
+    403: PermissionDeniedError,
+    404: NotFoundError,
+    422: UnprocessableEntityError,
+    429: RateLimitError,
+}
+
+
 def to_api_error(response: httpx.Response) -> APIStatusError:
-    """Translate a non-success ``httpx.Response`` into :class:`APIStatusError`,
-    extracting the backend error envelope's ``message`` when present."""
+    """Translate a non-success ``httpx.Response`` into the most specific
+    :class:`APIStatusError` subclass available, extracting the backend error
+    envelope's ``message`` when present."""
     try:
         body: Any = response.json()
     except ValueError:
@@ -40,4 +74,11 @@ def to_api_error(response: httpx.Response) -> APIStatusError:
         if isinstance(error, dict)
         else f"Gumloop API returned HTTP {response.status_code}"
     )
-    return APIStatusError(message, status_code=response.status_code, body=body)
+    cls: type[APIStatusError]
+    if response.status_code in _STATUS_MAP:
+        cls = _STATUS_MAP[response.status_code]
+    elif response.status_code >= 500:
+        cls = ServerError
+    else:
+        cls = APIStatusError
+    return cls(message, status_code=response.status_code, body=body)
diff --git a/tests/sdk/test_errors.py b/tests/sdk/test_errors.py
new file mode 100644
index 0000000..8554b72
--- /dev/null
+++ b/tests/sdk/test_errors.py
@@ -0,0 +1,187 @@
+"""Tests for the error hierarchy and retry logic."""
+
+from __future__ import annotations
+
+import asyncio
+import time
+
+import httpx
+import pytest
+import respx
+
+from gumloop import AsyncGumloop
+from gumloop import Gumloop
+from gumloop.errors import APIStatusError
+from gumloop.errors import BadRequestError
+from gumloop.errors import NotFoundError
+from gumloop.errors import PermissionDeniedError
+from gumloop.errors import RateLimitError
+from gumloop.errors import ServerError
+from gumloop.errors import UnprocessableEntityError
+from gumloop.errors import to_api_error
+from tests.sdk.helpers import API_BASE
+
+
+# ---------------------------------------------------------------------------
+# to_api_error dispatch
+# ---------------------------------------------------------------------------
+
+
+def _fake_response(status: int, body: dict | None = None) -> httpx.Response:
+    import json
+
+    content = json.dumps(body).encode() if body else b""
+    return httpx.Response(status, content=content, headers={"content-type": "application/json"})
+
+
+@pytest.mark.parametrize(
+    ("status", "expected_cls"),
+    [
+        (400, BadRequestError),
+        (403, PermissionDeniedError),
+        (404, NotFoundError),
+        (422, UnprocessableEntityError),
+        (429, RateLimitError),
+        (500, ServerError),
+        (503, ServerError),
+        (418, APIStatusError),  # unknown 4xx falls back to base class
+    ],
+)
+def test_to_api_error_dispatches_correct_subclass(status: int, expected_cls: type) -> None:
+    exc = to_api_error(_fake_response(status))
+    assert isinstance(exc, expected_cls)
+    assert exc.status_code == status
+
+
+def test_to_api_error_extracts_message_from_envelope() -> None:
+    body = {"error": {"message": "quota exceeded", "code": "rate_limit"}}
+    exc = to_api_error(_fake_response(429, body))
+    assert "quota exceeded" in str(exc)
+    assert exc.code == "rate_limit"
+
+
+def test_to_api_error_falls_back_to_generic_message_when_no_envelope() -> None:
+    exc = to_api_error(_fake_response(503))
+    assert "503" in str(exc)
+
+
+def test_api_status_error_is_catchable_as_base_class() -> None:
+    exc = to_api_error(_fake_response(404))
+    assert isinstance(exc, APIStatusError)
+
+
+# ---------------------------------------------------------------------------
+# Retry logic — sync
+# ---------------------------------------------------------------------------
+
+
+@respx.mock
+def test_retries_on_500_then_succeeds(client: Gumloop) -> None:
+    route = respx.get(f"{API_BASE}/agents").mock(
+        side_effect=[
+            httpx.Response(500, json={"error": {"message": "internal error"}}),
+            httpx.Response(200, json={"agents": []}),
+        ]
+    )
+
+    result = client.agents.list()
+
+    assert result.agents == []
+    assert route.call_count == 2
+
+
+@respx.mock
+def test_retries_on_429_and_honours_retry_after(monkeypatch: pytest.MonkeyPatch, client: Gumloop) -> None:
+    slept: list[float] = []
+    monkeypatch.setattr(time, "sleep", lambda s: slept.append(s))
+
+    route = respx.get(f"{API_BASE}/agents").mock(
+        side_effect=[
+            httpx.Response(429, headers={"retry-after": "1"}, json={}),
+            httpx.Response(200, json={"agents": []}),
+        ]
+    )
+
+    result = client.agents.list()
+
+    assert result.agents == []
+    assert route.call_count == 2
+    assert slept == [1.0]
+
+
+@respx.mock
+def test_raises_after_max_retries_exhausted(client: Gumloop, monkeypatch: pytest.MonkeyPatch) -> None:
+    monkeypatch.setattr(time, "sleep", lambda _: None)
+
+    respx.get(f"{API_BASE}/agents").mock(
+        return_value=httpx.Response(500, json={"error": {"message": "still broken"}})
+    )
+
+    with pytest.raises(ServerError):
+        client.agents.list()
+
+
+@respx.mock
+def test_does_not_retry_on_client_errors(client: Gumloop) -> None:
+    route = respx.get(f"{API_BASE}/agents").mock(
+        return_value=httpx.Response(404, json={})
+    )
+
+    with pytest.raises(NotFoundError):
+        client.agents.list()
+
+    assert route.call_count == 1
+
+
+@respx.mock
+def test_no_retry_client_zero_max_retries() -> None:
+    client = Gumloop(access_token="token", max_retries=0)
+    route = respx.get(f"{API_BASE}/agents").mock(
+        return_value=httpx.Response(500, json={})
+    )
+
+    with pytest.raises(ServerError):
+        client.agents.list()
+
+    assert route.call_count == 1
+
+
+# ---------------------------------------------------------------------------
+# Retry logic — async
+# ---------------------------------------------------------------------------
+
+
+@respx.mock
+def test_async_retries_on_500_then_succeeds() -> None:
+    respx.get(f"{API_BASE}/agents").mock(
+        side_effect=[
+            httpx.Response(500, json={"error": {"message": "internal error"}}),
+            httpx.Response(200, json={"agents": []}),
+        ]
+    )
+
+    async def run() -> None:
+        async with AsyncGumloop(access_token="token") as client:
+            result = await client.agents.list()
+            assert result.agents == []
+
+    asyncio.run(run())
+
+
+@respx.mock
+def test_async_raises_after_max_retries_exhausted(monkeypatch: pytest.MonkeyPatch) -> None:
+    async def _noop_sleep(_: float) -> None:
+        pass
+
+    monkeypatch.setattr(asyncio, "sleep", _noop_sleep)
+
+    respx.get(f"{API_BASE}/agents").mock(
+        return_value=httpx.Response(503, json={})
+    )
+
+    async def run() -> None:
+        async with AsyncGumloop(access_token="token") as client:
+            with pytest.raises(ServerError):
+                await client.agents.list()
+
+    asyncio.run(run())

From b656683679625852e0e94a63f0243858d7b2c172 Mon Sep 17 00:00:00 2001
From: kapoor28 <kapoor28@wisc.edu>
Date: Wed, 10 Jun 2026 12:00:20 -0700
Subject: [PATCH 2/2] address review: idempotent-only 5xx retries, HTTP-date
 Retry-After, sleep patch in tests

---
 src/gumloop/_http.py     | 36 ++++++++++++++++++++-----
 tests/sdk/test_errors.py | 57 +++++++++++++++++++++++++++++++++++++++-
 2 files changed, 85 insertions(+), 8 deletions(-)

diff --git a/src/gumloop/_http.py b/src/gumloop/_http.py
index 8877272..13a5471 100644
--- a/src/gumloop/_http.py
+++ b/src/gumloop/_http.py
@@ -52,9 +52,20 @@ def _omit_none_params(params: Mapping[str, Any] | None) -> dict[str, Any] | None
     return {k: v for k, v in params.items() if v is not None}
 
 
-def _should_retry(exc: APIStatusError) -> bool:
-    # Retry on rate-limit and transient server errors; never retry client errors.
-    return isinstance(exc, (RateLimitError, ServerError))
+_IDEMPOTENT_METHODS = frozenset({"GET", "HEAD", "DELETE", "OPTIONS", "PUT"})
+
+
+def _should_retry(exc: APIStatusError, method: str) -> bool:
+    # Never retry client errors.
+    if not isinstance(exc, (RateLimitError, ServerError)):
+        return False
+    # POST/PATCH are non-idempotent: a 5xx may arrive after the server already
+    # committed the write, so retrying would duplicate the mutation. Only retry
+    # them on 429 (rate-limit), where the server explicitly guarantees the
+    # request was not processed.
+    if method.upper() not in _IDEMPOTENT_METHODS and isinstance(exc, ServerError):
+        return False
+    return True
 
 
 def _retry_delay(attempt: int, retry_after: float | None) -> float:
@@ -70,12 +81,23 @@ def _retry_delay(attempt: int, retry_after: float | None) -> float:
 
 
 def _parse_retry_after(response: httpx.Response) -> float | None:
+    import email.utils
+
     raw = response.headers.get("retry-after")
     if raw is None:
         return None
     try:
         return float(raw)
     except ValueError:
+        pass
+    # RFC 7231 also allows an HTTP-date: "Retry-After: Wed, 21 Oct 2015 07:28:00 GMT"
+    try:
+        dt = email.utils.parsedate_to_datetime(raw)
+        import datetime
+
+        delta = (dt - datetime.datetime.now(tz=datetime.timezone.utc)).total_seconds()
+        return max(delta, 0.0)
+    except Exception:
         return None
 
 
@@ -165,7 +187,7 @@ def post_to_stream_host(self, path: str, *, json: Any = None) -> Any:
             if response.status_code < 400:
                 return response.json() if response.content else None
             exc = to_api_error(response)
-            if attempt < self._max_retries and _should_retry(exc):
+            if attempt < self._max_retries and _should_retry(exc, "POST"):
                 delay = _retry_delay(attempt, _parse_retry_after(response))
                 logger.debug("retrying stream-host request (attempt %d, delay %.2fs)", attempt + 1, delay)
                 time.sleep(delay)
@@ -242,7 +264,7 @@ def _request(self, method: str, path: str, **kwargs: Any) -> Any:
             if response.status_code < 400:
                 return response.json() if response.content else None
             exc = to_api_error(response)
-            if attempt < self._max_retries and _should_retry(exc):
+            if attempt < self._max_retries and _should_retry(exc, method):
                 delay = _retry_delay(attempt, _parse_retry_after(response))
                 logger.debug("retrying %s %s (attempt %d, delay %.2fs)", method, path, attempt + 1, delay)
                 time.sleep(delay)
@@ -315,7 +337,7 @@ async def post_to_stream_host(self, path: str, *, json: Any = None) -> Any:
             if response.status_code < 400:
                 return response.json() if response.content else None
             exc = to_api_error(response)
-            if attempt < self._max_retries and _should_retry(exc):
+            if attempt < self._max_retries and _should_retry(exc, "POST"):
                 delay = _retry_delay(attempt, _parse_retry_after(response))
                 logger.debug("retrying stream-host request (attempt %d, delay %.2fs)", attempt + 1, delay)
                 await asyncio.sleep(delay)
@@ -388,7 +410,7 @@ async def _request(self, method: str, path: str, **kwargs: Any) -> Any:
             if response.status_code < 400:
                 return response.json() if response.content else None
             exc = to_api_error(response)
-            if attempt < self._max_retries and _should_retry(exc):
+            if attempt < self._max_retries and _should_retry(exc, method):
                 delay = _retry_delay(attempt, _parse_retry_after(response))
                 logger.debug("retrying %s %s (attempt %d, delay %.2fs)", method, path, attempt + 1, delay)
                 await asyncio.sleep(delay)
diff --git a/tests/sdk/test_errors.py b/tests/sdk/test_errors.py
index 8554b72..1330942 100644
--- a/tests/sdk/test_errors.py
+++ b/tests/sdk/test_errors.py
@@ -76,7 +76,9 @@ def test_api_status_error_is_catchable_as_base_class() -> None:
 
 
 @respx.mock
-def test_retries_on_500_then_succeeds(client: Gumloop) -> None:
+def test_retries_on_500_then_succeeds(monkeypatch: pytest.MonkeyPatch, client: Gumloop) -> None:
+    monkeypatch.setattr(time, "sleep", lambda _: None)
+
     route = respx.get(f"{API_BASE}/agents").mock(
         side_effect=[
             httpx.Response(500, json={"error": {"message": "internal error"}}),
@@ -146,6 +148,59 @@ def test_no_retry_client_zero_max_retries() -> None:
     assert route.call_count == 1
 
 
+@respx.mock
+def test_retry_after_http_date_is_parsed(monkeypatch: pytest.MonkeyPatch, client: Gumloop) -> None:
+    slept: list[float] = []
+    monkeypatch.setattr(time, "sleep", lambda s: slept.append(s))
+
+    # Use a date far in the future so the computed delta is clearly > 0.
+    future_date = "Wed, 01 Jan 2099 00:00:00 GMT"
+    respx.get(f"{API_BASE}/agents").mock(
+        side_effect=[
+            httpx.Response(429, headers={"retry-after": future_date}, json={}),
+            httpx.Response(200, json={"agents": []}),
+        ]
+    )
+
+    client.agents.list()
+
+    assert len(slept) == 1
+    assert slept[0] > 0  # parsed as a positive delay, not silently dropped
+
+
+@respx.mock
+def test_post_does_not_retry_on_5xx(monkeypatch: pytest.MonkeyPatch, client: Gumloop) -> None:
+    # POST is non-idempotent; a 5xx after a commit would duplicate the write.
+    monkeypatch.setattr(time, "sleep", lambda _: None)
+
+    route = respx.post(f"{API_BASE}/agents").mock(
+        return_value=httpx.Response(500, json={"error": {"message": "upstream error"}})
+    )
+
+    with pytest.raises(ServerError):
+        client.agents.create(name="Test", model_name="auto")
+
+    assert route.call_count == 1  # no retries
+
+
+@respx.mock
+def test_post_does_retry_on_429(monkeypatch: pytest.MonkeyPatch, client: Gumloop) -> None:
+    # 429 is safe to retry even for POST — the server rejected it before processing.
+    monkeypatch.setattr(time, "sleep", lambda _: None)
+
+    route = respx.post(f"{API_BASE}/agents").mock(
+        side_effect=[
+            httpx.Response(429, json={}),
+            httpx.Response(201, json={"agent": {"id": "a1", "name": "Test"}}),
+        ]
+    )
+
+    result = client.agents.create(name="Test", model_name="auto")
+
+    assert result.agent.id == "a1"
+    assert route.call_count == 2
+
+
 # ---------------------------------------------------------------------------
 # Retry logic — async
 # ---------------------------------------------------------------------------