fix(client): batch embed retries, transcription reraise, and batch result typing

fcogidi · fcogidi · commit e6cf0c164496 · 2026-04-17T16:26:52.000-04:00
- Skip recursive micro-batch split on retryable errors in embed_batch
- Use bare raise in transcription batch error handler
- Define OnBatchResult as PEP 695 generic type alias

Made-with: Cursor
diff --git a/src/infermesh/_embedding.py b/src/infermesh/_embedding.py
@@ -34,6 +34,12 @@ def _validate_micro_batch_size(micro_batch_size: int) -> None:
         raise ValueError("``micro_batch_size`` must be a positive integer.")
 
 
+def _should_isolate_embedding_failure(self: LMClient, exc: Exception) -> bool:
+    """Return whether a failed micro-batch should be recursively isolated."""
+
+    return not isinstance(exc, self._retryable_exceptions)
+
+
 async def _aembed_one(
     self: LMClient,
     input_data: str,
@@ -105,6 +111,10 @@ async def _resolve_embedding_chunk_capture(
     except Exception as exc:
         if len(input_data) == 1:
             return [(start_index, None, exc)]
+        if not _should_isolate_embedding_failure(self, exc):
+            return [
+                (start_index + offset, None, exc) for offset in range(len(input_data))
+            ]
         midpoint = len(input_data) // 2
         left = await _resolve_embedding_chunk_capture(
             self,
diff --git a/src/infermesh/_transcription.py b/src/infermesh/_transcription.py
@@ -110,7 +110,7 @@ def admit_inputs() -> int:
                         pending = list(active_tasks)
                         active_tasks.clear()
                         await cancel_tasks(pending)
-                        raise exc
+                        raise
                     assert errors is not None
                     errors[index] = exc
                     if on_result is not None:
diff --git a/src/infermesh/types.py b/src/infermesh/types.py
@@ -678,7 +678,7 @@ def __len__(self) -> int:
 TranscriptionBatchResult: TypeAlias = BatchResult[TranscriptionResult]
 """Type alias for a batch of transcription results."""
 
-OnBatchResult: TypeAlias = Callable[[int, T | None, BaseException | None], None] | None
+type OnBatchResult[T] = Callable[[int, T | None, BaseException | None], None] | None
 """Generic callback type for per-result notifications in batch methods.
 
 Called as ``on_result(index, result, error)`` each time a single request
diff --git a/tests/test_client_batch.py b/tests/test_client_batch.py
@@ -242,6 +242,45 @@ async def test_aembed_batch_recursively_isolates_bad_items(
     assert result.errors[2] is None
 
 
+@pytest.mark.asyncio
+async def test_aembed_batch_does_not_split_retryable_failures(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    calls: list[list[str]] = []
+
+    class RetryableEmbeddingFakeLiteLLM(FakeLiteLLM):
+        async def aembedding(self, **kwargs: Any) -> dict[str, Any]:
+            payload = list(kwargs["input"])
+            calls.append(payload)
+            raise self.RateLimitError("rate limited")
+
+    monkeypatch.setattr(
+        LMClient,
+        "_create_litellm_module",
+        lambda self: RetryableEmbeddingFakeLiteLLM(),
+    )
+    client = LMClient(
+        model="openai/test",
+        api_base="http://localhost",
+        max_retries=0,
+    )
+
+    batch = await client.aembed_batch(
+        ["a", "b", "c", "d"],
+        micro_batch_size=4,
+        return_exceptions=True,
+    )
+
+    assert calls == [["a", "b", "c", "d"]]
+    assert batch.errors is not None
+    assert all(result is None for result in batch.results)
+    assert all(
+        isinstance(error, RetryableEmbeddingFakeLiteLLM.RateLimitError)
+        for error in batch.errors
+    )
+    client.close()
+
+
 @pytest.mark.asyncio
 async def test_aembed_batch_return_exceptions_false_raises(
     failing_fake_client: LMClient,