NVIDIA-NeMo
diff --git a/‎architecture/dataset-builders.md‎
Lines changed: 1 addition & 1 deletion b/‎architecture/dataset-builders.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎packages/data-designer-engine/src/data_designer/engine/dataset_builders/async_scheduler.py‎
Lines changed: 383 additions & 89 deletions b/‎packages/data-designer-engine/src/data_designer/engine/dataset_builders/async_scheduler.py‎
Lines changed: 383 additions & 89 deletions
diff --git a/‎packages/data-designer-engine/src/data_designer/engine/dataset_builders/scheduling/queue.py‎
Lines changed: 7 additions & 2 deletions b/‎packages/data-designer-engine/src/data_designer/engine/dataset_builders/scheduling/queue.py‎
Lines changed: 7 additions & 2 deletions
diff --git a/‎packages/data-designer-engine/src/data_designer/engine/models/clients/errors.py‎
Lines changed: 1 addition & 0 deletions b/‎packages/data-designer-engine/src/data_designer/engine/models/clients/errors.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎packages/data-designer-engine/src/data_designer/engine/models/clients/model_request_executor.py‎
Lines changed: 16 additions & 13 deletions b/‎packages/data-designer-engine/src/data_designer/engine/models/clients/model_request_executor.py‎
Lines changed: 16 additions & 13 deletions
diff --git a/‎packages/data-designer-engine/src/data_designer/engine/models/errors.py‎
Lines changed: 8 additions & 0 deletions b/‎packages/data-designer-engine/src/data_designer/engine/models/errors.py‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎packages/data-designer-engine/src/data_designer/engine/observability.py‎
Lines changed: 2 additions & 0 deletions b/‎packages/data-designer-engine/src/data_designer/engine/observability.py‎
Lines changed: 2 additions & 0 deletions
@@ -35,7 +35,7 @@ Preparation (`_prepare_async_run`):
 4. Constructs `CompletionTracker`, `RowGroupBufferManager`, `AsyncTaskScheduler`
 5. Hooks `ProcessorRunner` for pre-batch and post-batch stages
 
-`AsyncTaskScheduler` runs on a dedicated async loop with frontier-driven dispatch, task-admission leases, salvage rounds for failed tasks, and order-dependent locks for columns that must execute sequentially. Ready frontier tasks enter `FairTaskQueue`, are selected through virtual-time ordering, and are committed only after `TaskAdmissionController` acquires the required scheduler resources. Salvage-exhausted tasks are dropped except for rate-limit failures, which stay deferred and retry after cooldown/backoff so 429s delay records rather than discard them.
+`AsyncTaskScheduler` runs on a dedicated async loop with frontier-driven dispatch, task-admission leases, salvage rounds for failed tasks, and order-dependent locks for columns that must execute sequentially. Ready frontier tasks enter `FairTaskQueue`, are selected through virtual-time ordering, and are committed only after `TaskAdmissionController` acquires the required scheduler resources. Salvage-exhausted tasks are dropped except for preserved retryable failures: provider rate limits and local request-admission queue timeouts stay deferred and retry after cooldown/backoff so scheduler-local pressure delays records rather than discarding them.
 
 ### Execution Graph
 
 
@@ -88,9 +88,14 @@ def discard_where(self, predicate: Callable[[SchedulableTask], bool]) -> None:
             if predicate(item):
                 self.discard(task_id)
 
-    def select_next(self, is_eligible: Callable[[SchedulableTask, QueueView], bool]) -> QueueSelection | None:
+    def select_next(
+        self,
+        is_eligible: Callable[[SchedulableTask, QueueView], bool],
+        *,
+        view: QueueView | None = None,
+    ) -> QueueSelection | None:
         """Return the next eligible task without mutating queue state."""
-        view = self.view()
+        view = self.view() if view is None else view
         heap_copy = list(self._heap)
         heapq.heapify(heap_copy)
         active_seen: set[TaskGroupKey] = set()
 
@@ -24,6 +24,7 @@ class ProviderErrorKind(str, Enum):
     NOT_FOUND = "not_found"
     PERMISSION_DENIED = "permission_denied"
     RATE_LIMIT = "rate_limit"
+    REQUEST_ADMISSION_TIMEOUT = "request_admission_timeout"
     TIMEOUT = "timeout"
     UNPROCESSABLE_ENTITY = "unprocessable_entity"
     UNSUPPORTED_CAPABILITY = "unsupported_capability"
 
@@ -121,12 +121,7 @@ def _execute_sync_attempt(self, domain: RequestDomain, call: Callable[[], _T]) -
         try:
             lease = self._request_admission.acquire_sync(item)
         except RequestAdmissionError as exc:
-            raise ProviderError(
-                kind=ProviderErrorKind.TIMEOUT,
-                message=str(exc),
-                provider_name=self._provider_name,
-                model_name=self._model_id,
-            ) from exc
+            raise self._provider_error_from_request_admission(exc) from exc
         try:
             self._emit_model_event("model_request_started", item=item, lease=lease)
             result = call()
@@ -169,12 +164,7 @@ async def _execute_async_attempt(self, domain: RequestDomain, call: Callable[[],
         try:
             lease = await self._request_admission.acquire_async(item)
         except RequestAdmissionError as exc:
-            raise ProviderError(
-                kind=ProviderErrorKind.TIMEOUT,
-                message=str(exc),
-                provider_name=self._provider_name,
-                model_name=self._model_id,
-            ) from exc
+            raise self._provider_error_from_request_admission(exc) from exc
         except asyncio.CancelledError:
             raise
         try:
@@ -216,7 +206,7 @@ def _max_attempts(self) -> int:
     def _should_retry(self, exc: ProviderError, attempt: int) -> bool:
         if attempt >= self._max_attempts() - 1:
             return False
-        if isinstance(exc.__cause__, RequestAdmissionError):
+        if exc.kind == ProviderErrorKind.REQUEST_ADMISSION_TIMEOUT:
             return False
         if exc.kind == ProviderErrorKind.RATE_LIMIT:
             return False
@@ -249,6 +239,19 @@ def _release_provider_error(self, lease: RequestAdmissionLease, exc: ProviderErr
             outcome = RequestReleaseOutcome(kind="provider_failure")
         self._request_admission.release(lease, outcome)
 
+    def _provider_error_from_request_admission(self, exc: RequestAdmissionError) -> ProviderError:
+        kind = (
+            ProviderErrorKind.REQUEST_ADMISSION_TIMEOUT
+            if exc.decision.reason == "queue_timeout"
+            else ProviderErrorKind.TIMEOUT
+        )
+        return ProviderError(
+            kind=kind,
+            message=str(exc),
+            provider_name=self._provider_name,
+            model_name=self._model_id,
+        )
+
     def _item(self, domain: RequestDomain) -> RequestAdmissionItem:
         resolved = self._resource_resolver.resolve(
             provider_name=self._provider_name,
 
@@ -73,6 +73,9 @@ class ModelQuotaExceededError(DataDesignerError): ...
 class ModelTimeoutError(DataDesignerError): ...
 
 
+class ModelRequestAdmissionTimeoutError(ModelTimeoutError): ...
+
+
 class ModelContextWindowExceededError(DataDesignerError): ...
 
 
@@ -303,6 +306,7 @@ def _raise_from_provider_error(
     _KIND_MAP: dict[ProviderErrorKind, type[DataDesignerError]] = {
         ProviderErrorKind.RATE_LIMIT: ModelRateLimitError,
         ProviderErrorKind.QUOTA_EXCEEDED: ModelQuotaExceededError,
+        ProviderErrorKind.REQUEST_ADMISSION_TIMEOUT: ModelRequestAdmissionTimeoutError,
         ProviderErrorKind.TIMEOUT: ModelTimeoutError,
         ProviderErrorKind.NOT_FOUND: ModelNotFoundError,
         ProviderErrorKind.PERMISSION_DENIED: ModelPermissionDeniedError,
@@ -321,6 +325,10 @@ def _raise_from_provider_error(
             f"The request to model {model_name!r} timed out while {purpose}.",
             "Check your connection and try again. You may need to increase the timeout setting for the model.",
         ),
+        ProviderErrorKind.REQUEST_ADMISSION_TIMEOUT: (
+            f"Local request admission for model {model_name!r} timed out while {purpose}; the provider request was not sent.",
+            "Reduce request concurrency or tune the model's max_parallel_requests to match the endpoint's real capacity. For async dataset generation, also consider lowering RunConfig.max_in_flight_tasks.",
+        ),
         ProviderErrorKind.NOT_FOUND: (
             f"The specified model {model_name!r} could not be found while {purpose}.",
             f"Check that the model name is correct and supported by your model provider {model_provider_name!r} and try again.",
 
@@ -93,6 +93,8 @@ def reset(self, token: contextvars.Token) -> None:
     "admission_blocked",
     "group_capped",
     "request_pressure_advisory_skipped",
+    "request_pressure_advisory_blocked",
+    "dispatch_batch_yield",
     "task_lease_acquired",
     "admission_denied",
     "worker_spawned",