diff --git a/api/oss/tests/pytest/unit/evaluators/test_catalog_types.py b/api/oss/tests/pytest/unit/evaluators/test_catalog_types.py index 7451344f83..ea9477a8c5 100644 --- a/api/oss/tests/pytest/unit/evaluators/test_catalog_types.py +++ b/api/oss/tests/pytest/unit/evaluators/test_catalog_types.py @@ -22,4 +22,31 @@ def test_catalog_types_include_message_messages_model_and_prompt_template(): ] == "model" ) + prompt_properties = by_key["prompt-template"]["properties"] + fallback_schema = prompt_properties["fallback_llm_configs"] + retry_schema = prompt_properties["retry_policy"] + fallback_policy_schema = prompt_properties["fallback_policy"] + fallback_array_schema = next( + option for option in fallback_schema["anyOf"] if option.get("type") == "array" + ) + retry_object_schema = next( + option for option in retry_schema["anyOf"] if option.get("type") == "object" + ) + assert fallback_schema["default"] is None + assert ( + fallback_array_schema["items"]["properties"]["model"]["x-ag-type-ref"] + == "model" + ) + assert "model" in fallback_array_schema["items"]["required"] + assert fallback_policy_schema["x-ag-type"] == "choice" + assert fallback_policy_schema["enum"] == [ + "off", + "availability", + "capacity", + "access", + "any", + ] + assert set(retry_object_schema["properties"]) == {"max_retries", "delay_ms"} + assert "chat_template_kwargs" in prompt_properties["llm_config"]["properties"] assert by_key["llm"]["properties"]["model"]["x-ag-type-ref"] == "model" + assert "chat_template_kwargs" in by_key["llm"]["properties"] diff --git a/api/run-tests.py b/api/run-tests.py index dd0b468fe0..33b3988d4d 100644 --- a/api/run-tests.py +++ b/api/run-tests.py @@ -7,6 +7,10 @@ from dotenv import load_dotenv +ROOT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) +LOCAL_SDK_DIR = os.path.join(ROOT_DIR, "sdk") + + TYPES = { "license": ["ee", "oss"], "coverage": ["smoke", "full"], @@ -31,6 +35,14 @@ def _resolve_license() -> str: return "ee" if os.getenv("AGENTA_LICENSE") == "ee" else "oss" +def _prepend_pythonpath(path: str) -> None: + current = os.environ.get("PYTHONPATH") + paths = [path] + if current: + paths.append(current) + os.environ["PYTHONPATH"] = os.pathsep.join(paths) + + @click.command() @click.option( "--env-file", @@ -143,6 +155,9 @@ def run_tests( license = _resolve_license() click.echo(f"AGENTA_LICENSE={license}") + if os.path.isdir(LOCAL_SDK_DIR): + _prepend_pythonpath(LOCAL_SDK_DIR) + # Set optional dimensions for name, value in [ ("COVERAGE", coverage), diff --git a/docs/designs/extend-prompt-templates/findings.md b/docs/designs/extend-prompt-templates/findings.md new file mode 100644 index 0000000000..b3b26bba12 --- /dev/null +++ b/docs/designs/extend-prompt-templates/findings.md @@ -0,0 +1,122 @@ +# Extend Prompt Templates Findings + +Scan scope: `973e80146..9420b8779` on `feat/extend-prompt-templates` + +Active path: `docs/designs/extend-prompt-templates` + +Sources reviewed: + +- `docs/designs/extend-prompt-templates/{gap,initial.specs,plan,proposal,research}.md` +- `sdk/agenta/sdk/utils/types.py` +- `sdk/agenta/sdk/engines/running/handlers.py` +- `sdk/agenta/sdk/engines/running/interfaces.py` +- `api/oss/src/resources/workflows/catalog.py` +- `api/pyproject.toml` +- `web/packages/agenta-entities/src/shared/execution/requestBodyBuilder.ts` +- `web/packages/agenta-entity-ui/src/DrillInView/components/PlaygroundConfigSection.tsx` +- `web/packages/agenta-entity-ui/src/DrillInView/SchemaControls/PromptSchemaControl.tsx` +- `web/oss/src/components/Playground/Components/Modals/RefinePromptModal/hooks/useRefinePrompt.ts` + +Verification run: + +- `pytest -q sdk/oss/tests/pytest/unit/test_prompt_template_extensions.py api/oss/tests/pytest/unit/evaluators/test_catalog_types.py` from repo root: SDK tests passed, API import failed with `ModuleNotFoundError: No module named 'oss.src'`. +- `pytest -q oss/tests/pytest/unit/evaluators/test_catalog_types.py` from `api`: failed because `prompt-template` lacked `fallback_llm_configs`. +- `PYTHONPATH=/Users/junaway/Agenta/github/application/sdk:/Users/junaway/Agenta/github/application/api pytest -q oss/tests/pytest/unit/evaluators/test_catalog_types.py` from `api`: passed. +- Manual SDK repro confirmed `PromptTemplate.format()` raises when `chat_template_kwargs` contains an unresolved literal `{{...}}`. +- After fixes, `pytest -q sdk/oss/tests/pytest/unit/test_prompt_template_extensions.py`: passed, 9 tests. +- After fixes, `poetry run python run-tests.py oss/tests/pytest/unit/evaluators/test_catalog_types.py` from `api`: passed, 1 test. +- After fixes, `pnpm --filter @agenta/entity-ui build` from `web`: passed. + +## Notes + +- No whitespace errors were found by `git diff --check HEAD~2..HEAD`. +- User decision: `chat_template_kwargs` is a strict 1:1 provider pass-through field. +- User decision: prompt fallback fields are normal `data.parameters` fields and must be editable in the web registry or playground like other parameter fields. +- User decision: web tests are out of scope for this work. + +## Open Questions + +No open questions. + +## Open Findings + +### [OPEN] FPT-004: Runtime coverage is still narrower than the implementation risk + +- ID: `FPT-004` +- Origin: `scan` +- Lens: `verification` +- Severity: `P2` +- Confidence: `medium` +- Status: `open` +- Category: `Testing` +- Summary: The tests now cover SDK data-model defaults, basic fallback movement, catalog shape, and `chat_template_kwargs` 1:1 formatting behavior, but do not yet cover the full runtime fallback matrix: retry ordering, policy categories, no fallback on local errors, exhaustion behavior, and service/API smoke. +- Evidence: `sdk/oss/tests/pytest/unit/test_prompt_template_extensions.py` covers default/null behavior, `chat_template_kwargs` in `to_openai_kwargs()`, fallback model validation, 404 policy classification, one 503 fallback success, and unchanged `chat_template_kwargs` through `PromptTemplate.format()`. The plan still lists additional tests for retry-before-fallback, 5xx/timeout/429/401/403/400/404/422 categories, local prompt errors, final exhaustion, service completion/chat, and API catalog endpoint exposure. The user explicitly excluded web tests from this work. +- Files: + - `sdk/oss/tests/pytest/unit/test_prompt_template_extensions.py` + - `api/oss/tests/pytest/unit/evaluators/test_catalog_types.py` + - `docs/designs/extend-prompt-templates/plan.md` +- Cause: The first implementation added narrow unit coverage but did not follow the full validation matrix for prompt fallback execution. +- Explanation: The feature changes provider-call control flow. Without targeted tests around failure classification, retry boundaries, and final exhaustion, regressions can look like provider flakiness. +- Suggested Fix: Add focused SDK tests for retry/exhaustion/local-error behavior and optional service/API smoke tests. Do not add web tests in this work. +- Alternatives: Accept the remaining runtime matrix as follow-up coverage if this PR only needs the currently added unit guards. +- Sources: `docs/designs/extend-prompt-templates/plan.md`, test scan, user decision. + +## Closed Findings + +### [CLOSED] FPT-001: API catalog verification needs the local SDK setup + +- ID: `FPT-001` +- Origin: `scan` +- Lens: `verification` +- Severity: `P3` +- Confidence: `high` +- Status: `fixed` +- Category: `Testing` +- Summary: API catalog verification depended on running with the branch SDK on the import path. Without that setup, `run-tests.py` could import `api/.venv`'s installed SDK and report stale catalog contents. +- Evidence: The user's full `poetry run python run-tests.py` failed with `KeyError: 'fallback_llm_configs'`. A direct import check showed `agenta.sdk.utils.types` resolving to `api/.venv/lib/python3.11/site-packages/agenta/sdk/utils/types.py`. After updating `api/run-tests.py` to prepend the monorepo `sdk` directory to `PYTHONPATH` for pytest subprocesses, `poetry run python run-tests.py oss/tests/pytest/unit/evaluators/test_catalog_types.py` passed. +- Files: + - `api/run-tests.py` + - `api/oss/tests/pytest/unit/evaluators/test_catalog_types.py` +- Resolution: Fixed by making the API test runner prefer the local monorepo SDK when invoking pytest. +- Sources: User run output, focused verification run. + +### [CLOSED] FPT-002: `chat_template_kwargs` is not passed through unchanged during prompt formatting + +- ID: `FPT-002` +- Origin: `scan` +- Lens: `verification` +- Severity: `P1` +- Confidence: `high` +- Status: `fixed` +- Category: `Correctness` +- Summary: `PromptTemplate.format()` recursively substituted variables inside `llm_config.chat_template_kwargs`, violating the confirmed 1:1 provider pass-through contract. +- Evidence: The implementation called `_substitute_variables()` on `new_llm_config.chat_template_kwargs`, and a manual repro with `chat_template_kwargs={"literal": "{{provider_flag}}"}` raised `TemplateFormatError`. +- Files: + - `sdk/agenta/sdk/utils/types.py` + - `sdk/oss/tests/pytest/unit/test_prompt_template_extensions.py` +- Resolution: Fixed by excluding `chat_template_kwargs` from prompt substitution and adding a regression test that verifies primary and fallback `chat_template_kwargs` survive `PromptTemplate.format()` unchanged. +- Sources: `pytest -q sdk/oss/tests/pytest/unit/test_prompt_template_extensions.py`. + +### [CLOSED] FPT-003: Fallback root fields are preserved but not editable in the prompt UI + +- ID: `FPT-003` +- Origin: `scan` +- Lens: `verification` +- Severity: `P2` +- Confidence: `high` +- Status: `fixed` +- Category: `Completeness` +- Summary: The prompt editor preserved fallback root fields but did not expose `fallback_llm_configs`, `fallback_policy`, or `retry_policy` for editing. +- Evidence: The user confirmed these fields must be editable in the web registry or playground like any other `data.parameters` field. `PromptSchemaControl` previously returned only messages, tools, response format, and template format controls. +- Files: + - `web/packages/agenta-entity-ui/src/DrillInView/SchemaControls/PromptSchemaControl.tsx` +- Resolution: Fixed by rendering prompt-root controls in `PromptSchemaControl` action-bar popovers. `Retry policy` appears to the right of `Prompt Syntax` and edits `max_retries` plus `delay_ms`; `Fallback policy` opens a popover with the fallback policy select and a list of fallback model dropdowns. +- Sources: `pnpm --filter @agenta/entity-ui build`. + +## Triage Plan + +Recommended next step: decide whether to backfill the remaining non-web runtime test matrix. + +1. If coverage is expanded in this PR, add SDK tests for retry ordering, local-error no-fallback, policy-category coverage, and exhaustion behavior. +2. If coverage is not expanded in this PR, keep `FPT-004` as a follow-up testing item. +3. Do not add web tests in this work. diff --git a/docs/designs/extend-prompt-templates/gap.md b/docs/designs/extend-prompt-templates/gap.md new file mode 100644 index 0000000000..1c913e2055 --- /dev/null +++ b/docs/designs/extend-prompt-templates/gap.md @@ -0,0 +1,127 @@ +# Fallback Models Gap + +## Summary + +The current system supports a single prompt-template `llm_config` in legacy completion/chat handlers. The proposal adds optional root-level retry and fallback controls plus ordered fallback LLM configs. + +## SDK Type Gap + +Current: + +- `PromptTemplate` has `messages`, `template_format`, `input_keys`, and `llm_config`. +- `ModelConfig` is only a primary config. +- `ModelConfig` does not include `chat_template_kwargs`. +- There are no prompt-template root fields for retry or fallback. +- `to_openai_kwargs()` only serializes `self.llm_config`. + +Needed: + +- Add `fallback_llm_configs`, `retry_policy`, and `fallback_policy` to `PromptTemplate`. +- Keep every new field optional/null in stored config. +- Apply runtime behavior defaults outside the data model: + - `fallback_llm_configs: null` -> `[]` + - `retry_policy: null` -> built-in retry policy + - `fallback_policy: null` -> `off` + - `chat_template_kwargs: null` -> omitted from provider kwargs +- Reuse the current `ModelConfig` shape for fallback entries while requiring `model`. +- Add `chat_template_kwargs` to the reusable LLM config shape for primary and fallback configs. +- Add enums/models for retry and fallback policy. +- Add internal candidate-specific kwargs helpers. +- Ensure `PromptTemplate.format()` preserves and formats relevant fallback fields where needed. + +## SDK Handler Gap + +Current: + +- `completion_v0` resolves provider settings for one model, formats once, and calls once. +- `chat_v0` resolves provider settings for one model, formats once, appends messages, and calls once. +- `_call_llm_with_fallback()` exists only for `llm_v0` and uses different secret/provider behavior. +- Retry behavior exists only as fixed low-level `mockllm` recovery for closed HTTP clients. + +Needed: + +- Add a shared prompt fallback runner for `completion_v0` and `chat_v0`. +- Retry each current LLM config before considering fallback. +- Classify provider-call errors into `availability`, `capacity`, `access`, and `any`. +- Keep local prompt/input validation outside fallback. +- Resolve provider settings for each candidate via `SecretsManager.get_provider_settings_from_workflow()`. +- Clean up `chat_v0` input normalization. + +## Interface And Catalog Gap + +Current: + +- `single_prompt_parameters_schema()` exposes `prompt` as `x-ag-type-ref: "prompt-template"`. +- `CATALOG_TYPES` exposes `prompt-template`, `model`, `llm`, and `llms`. +- Tests assert `prompt-template.llm_config.model` has `x-ag-type-ref: "model"`. +- No catalog schema exists for prompt root fallback fields. + +Needed: + +- Update the generated/dereferenced prompt-template schema. +- Ensure `fallback_llm_configs.items` carries the full LLM config schema. +- Ensure fallback item `model` carries `x-ag-type-ref: "model"`. +- Add or update catalog tests. + +## Services Gap + +Current: + +- Completion/chat services pass `PromptTemplate` through to SDK handlers. +- Managed `llm_v0` service is separate and already has its own `llms` flow. + +Needed: + +- Mostly no explicit service code change if SDK types parse and dump correctly. +- Service smoke tests should include prompt fallback fields to catch serialization loss. + +## API Gap + +Current: + +- API catalog types are sourced from SDK `CATALOG_TYPES`. +- API does not implement special prompt fallback behavior. +- `llm_apps_service.py` only uses `x-ag-type-ref` for parameter inference. + +Needed: + +- Ensure catalog endpoint returns new prompt-template schema. +- Ensure no default-stripping or schema normalization drops non-primitive defaults incorrectly. +- Add API catalog tests for fallback fields. + +## Web Schema/UI Gap + +Current: + +- Web resolves `x-ag-type-ref: "prompt-template"` dynamically. +- Prompt controls know how to render `messages`, nested `llm_config`, tools, and response format. +- Generic array/object controls can render arrays, but fallback entry add/remove/reorder UX needs confirmation. +- Model popover only edits primary `llm_config` or `llms[0]`. +- The model-parameters panel does not currently expose `chat_template_kwargs`, requested by issue #3996. +- Refine prompt modal only models/extracts messages and template format, and can drop extra root fields. +- Registry/display helpers generally pick the first primary model. + +Needed: + +- Confirm or add a usable array editor for `fallback_llm_configs`. +- Make fallback item `model` render through the grouped model selector. +- Render `fallback_policy` as enum/choice. +- Render `retry_policy` as a small object or inline advanced section. +- Render `chat_template_kwargs` as a model parameter object field and preserve it unchanged. +- Preserve fallback fields in prompt refine flows and execution payload building. +- Optionally show fallback summary in registry/playground headers. + +## Test Gap + +Current: + +- Tests cover prompt-template catalog exposure, interface references, and basic storage roundtrip. +- No tests cover fallback config storage, schema hints, handler fallback behavior, or web persistence. + +Needed: + +- SDK unit tests for Pydantic parsing/dumping and candidate construction. +- SDK unit tests proving new data-model defaults are null while runtime defaults are normalized separately. +- SDK handler tests for retry, fallback policy acceptance/rejection, and exhaustion. +- API catalog tests for new schema fields and x-ag metadata. +- Web tests for editing/preserving fallback fields. diff --git a/docs/designs/extend-prompt-templates/initial.specs.md b/docs/designs/extend-prompt-templates/initial.specs.md new file mode 100644 index 0000000000..e63936d958 --- /dev/null +++ b/docs/designs/extend-prompt-templates/initial.specs.md @@ -0,0 +1,169 @@ +# Fallback Models Initial Specs + +## Prompt Template Shape + +Add fallback and retry behavior as optional root fields on `PromptTemplate`, not inside `llm_config`. + +```python +class PromptTemplate(BaseModel): + messages: Messages + template_format: Literal["curly", "fstring", "jinja2"] = "curly" + input_keys: list[str] | None = None + + llm_config: LLMConfig + fallback_llm_configs: list[LLMConfig] | None = None + retry_policy: RetryPolicy | None = None + fallback_policy: FallbackPolicy | None = None +``` + +## Field Semantics + +Rule for new fields: + +```text +data model default: null +runtime behavior default: field-specific built-in behavior +``` + +```text +llm_config + Primary LLM config. + +fallback_llm_configs + Ordered fallback LLM configs. + Same shape as llm_config. + Optional/null in stored config. + Runtime default: []. + +retry_policy + Applies to each attempted LLM config: + primary and every fallback. + Optional/null in stored config. + Runtime default: built-in retry policy. + +fallback_policy + Decides whether the final error for one LLM config can move execution + to the next fallback LLM config. + Optional/null in stored config. + Runtime default: off. +``` + +New field defaults: + +```text +fallback_llm_configs + data model: null + runtime: [] + +retry_policy + data model: null + runtime: built-in retry policy + +fallback_policy + data model: null + runtime: off + +chat_template_kwargs + data model: null + runtime: omit from provider request +``` + +## LLM Config Shape + +Fallback configs use the same schema as the primary config. + +```python +class LLMConfig(BaseModel): + model: str + temperature: float | None = None + max_tokens: int | None = None + top_p: float | None = None + frequency_penalty: float | None = None + presence_penalty: float | None = None + reasoning_effort: Literal["none", "low", "medium", "high"] | None = None + chat_template_kwargs: dict | None = None + response_format: ResponseFormat | None = None + stream: bool | None = None + tools: list[dict] | None = None + tool_choice: Literal["none", "auto"] | dict | None = None +``` + +For `fallback_llm_configs` items, `model` is required and all other fields are optional. + +## JSON Schema Hints + +```json +{ + "fallback_llm_configs": { + "default": null, + "anyOf": [ + { + "type": "array", + "x-ag-type-ref": "llm-configs", + "items": { + "type": "object", + "x-ag-type-ref": "llm-config", + "properties": { + "model": { + "type": "string", + "x-ag-type-ref": "model" + } + }, + "required": ["model"] + } + }, + { "type": "null" } + ] + } +} +``` + +Use the same schema-default rule for `retry_policy`, `fallback_policy`, and `chat_template_kwargs`: nullable, with `default: null`. + +## Fallback Policy + +```text +off + no fallback + +availability + network errors, timeout, 5xx, 503 + +capacity + availability + 429/rate limit/overload + +access + capacity + 401/403 + +any + access + 400/404/422 provider-call errors +``` + +Local prompt/template errors do not fallback: + +```text +missing input key +invalid prompt template +malformed messages before provider call +local schema/config validation error +``` + +## Runtime Loop + +```text +llm_configs = [llm_config, *fallback_llm_configs] + +for current_llm_config in llm_configs: + run current_llm_config with retry_policy + + if success: + return response + + if max_retries: + if fallback_policy allows fallback given final error: + continue to next current_llm_config + + fail with final error + +fail with last error +``` diff --git a/docs/designs/extend-prompt-templates/plan.md b/docs/designs/extend-prompt-templates/plan.md new file mode 100644 index 0000000000..13fd43e460 --- /dev/null +++ b/docs/designs/extend-prompt-templates/plan.md @@ -0,0 +1,160 @@ +# Fallback Models Plan + +## Task 1: SDK Contract + +1. Add optional `RetryPolicy`, `FallbackPolicy`, and prompt fallback fields to SDK types. +2. Keep `ModelConfig` as the reusable LLM config shape. +3. Add `chat_template_kwargs` to `ModelConfig`. +4. Ensure every new field defaults to `null` in the data model: + - `fallback_llm_configs` + - `retry_policy` + - `fallback_policy` + - `chat_template_kwargs` +5. Normalize behavior internally at runtime: + - `fallback_llm_configs: null` -> `[]` + - `retry_policy: null` -> built-in retry policy + - `fallback_policy: null` -> `off` + - `chat_template_kwargs: null` -> omit from provider kwargs +6. Keep `to_openai_kwargs()` primary-only. +7. Add internal helpers for: + - `[llm_config, *fallback_llm_configs]` + - candidate-specific OpenAI/LiteLLM kwargs + - retry policy defaults + +Tests: + +- prompt-template parses old JSON +- prompt-template parses new JSON +- new fields dump as null/omitted in stored data unless explicitly set +- omitted/null new fields normalize to runtime behavior defaults +- `chat_template_kwargs` dumps into LLM kwargs unchanged +- null `chat_template_kwargs` is omitted from LLM kwargs +- fallback item requires `model` +- dump with `exclude_none=True` preserves explicit fallback fields + +## Task 2: Catalog Schema + +1. Add schema metadata for fallback fields through `PromptTemplate.model_json_schema()`. +2. Ensure fallback item `model` keeps `x-ag-type-ref: "model"`. +3. Ensure `chat_template_kwargs` appears in primary and fallback LLM config schemas. +4. Add semantic catalog keys if needed: + - `llm-config` + - `llm-configs` + - `retry-policy` + - `fallback-policy` +5. Do not encode runtime defaults as data-model schema defaults. +6. Let frontend materialize UI/runtime defaults client-side when needed. + +Tests: + +- catalog type includes prompt-template fallback root fields +- `fallback_llm_configs.items.properties.model["x-ag-type-ref"] == "model"` +- catalog type includes `chat_template_kwargs` +- existing prompt-template interface tests still pass + +## Task 3: SDK Runtime + +1. Add error classifier: + - `availability`: network, timeout, 5xx, 503 + - `capacity`: availability + 429/rate-limit/overload + - `access`: capacity + 401/403 + - `any`: access + 400/404/422 provider-call errors +2. Add `should_fallback(error, fallback_policy)`. +3. Add `run_llm_config_with_retry_policy()`. +4. Add prompt fallback runner: + +```text +llm_configs = [llm_config, *fallback_llm_configs] + +for current_llm_config in llm_configs: + run current_llm_config with retry_policy + + if success: + return response + + if max_retries: + if fallback_policy allows fallback given final error: + continue to next current_llm_config + + fail with final error + +fail with last error +``` + +5. Use the shared runner from `completion_v0`. +6. Use the shared runner from `chat_v0`. +7. Normalize `chat_v0` inputs before mutating them. +8. Preserve `_apply_responses_bridge_if_needed()` per candidate. +9. Resolve provider settings per candidate through `SecretsManager.get_provider_settings_from_workflow()`. + +Tests: + +- primary success does not touch fallbacks +- retry happens before fallback +- `availability` fallback handles 5xx/timeout +- `capacity` fallback handles 429 +- `access` fallback handles 401/403 +- `any` fallback handles 400/404/422 provider-call errors +- local prompt formatting errors do not fallback +- all candidates exhausted raises final/aggregate unavailable error + +## Task 4: Services And API + +1. Verify services parse and dump new `PromptTemplate` fields unchanged. +2. Add service smoke fixture with fallback fields. +3. Verify API catalog endpoint returns updated schemas. +4. Verify catalog default normalization does not drop required schema information. + +Tests: + +- service completion/chat accepts fallback prompt config +- API catalog type endpoint exposes fallback fields + +## Task 5: Web Schema And Editing + +1. Verify `x-ag-type-ref: "prompt-template"` dereferencing includes fallback fields. +2. Verify generic array/object controls can edit `fallback_llm_configs`. +3. If generic controls are not enough, add a compact prompt fallback editor: + - add fallback config + - remove fallback config + - reorder fallback config + - model selector per fallback config +4. Render `fallback_policy` as enum/choice. +5. Render `retry_policy` as a small inline object or advanced section. +6. Expose `chat_template_kwargs` in the model-parameters panel for issue #3996. +7. Keep primary model popover focused on `llm_config`; do not overload it with fallback editing unless needed. + +Tests: + +- fallback policy can be edited and committed +- fallback model can be added and committed +- fallback model `model` uses grouped model options +- `chat_template_kwargs` can be edited and is sent unchanged +- fallback fields survive reload + +## Task 6: Web Preservation Paths + +1. Preserve fallback root fields in request payload building. +2. Preserve fallback root fields in prompt refinement. +3. Preserve fallback root fields in gateway tools prompt updates. +4. Update model display helpers only if product wants fallback model summaries. + +Tests: + +- execution request includes fallback root fields +- refine prompt keeps fallback fields when accepting refined messages +- registry still shows primary model + +## Task 7: Documentation And Migration + +1. Update SDK docs/examples for prompt-template fallback. +2. Add one JSON example and one Python example. +3. Document `chat_template_kwargs` pass-through for Granite/Qwen-style thinking controls. +4. Document fallback policy categories. +5. Document that local prompt/input errors do not fallback. +6. Document that all new fields default to `null` in stored data. +7. Document runtime default behavior: + - `fallback_llm_configs: null` -> no fallback models + - `fallback_policy: null` -> `off` + - `retry_policy: null` -> built-in retry policy + - `chat_template_kwargs: null` -> omitted from provider kwargs diff --git a/docs/designs/extend-prompt-templates/proposal.md b/docs/designs/extend-prompt-templates/proposal.md new file mode 100644 index 0000000000..947ef99943 --- /dev/null +++ b/docs/designs/extend-prompt-templates/proposal.md @@ -0,0 +1,252 @@ +# Fallback Models Proposal + +## Goal + +Extend prompt templates without changing the existing meaning of `llm_config`. + +Also include [GitHub issue #3996](https://github.com/Agenta-AI/agenta/issues/3996): `chat_template_kwargs` should be a model parameter exposed in the playground and passed through unchanged to the provider request. + +All new fields default to `null` in the data model. Runtime applies field-specific built-in behavior. + +Root fields on `PromptTemplate`: + +```python +class PromptTemplate(BaseModel): + messages: Messages + template_format: Literal["curly", "fstring", "jinja2"] = "curly" + input_keys: list[str] | None = None + + llm_config: LLMConfig + fallback_llm_configs: list[LLMConfig] | None = None + retry_policy: RetryPolicy | None = None + fallback_policy: FallbackPolicy | None = None +``` + +## Semantics + +Rule for new fields: + +```text +data model default: null +runtime behavior default: field-specific built-in behavior +``` + +```text +llm_config + Primary LLM config. + +fallback_llm_configs + Ordered fallback LLM configs. + Same shape as llm_config. + Optional/null in stored config. + Runtime default: []. + +retry_policy + Applies to each attempted LLM config: + primary and every fallback. + Optional/null in stored config. + Runtime default: built-in retry policy. + +fallback_policy + Decides whether the final error for one LLM config can move execution + to the next fallback LLM config. + Optional/null in stored config. + Runtime default: off. +``` + +New field defaults: + +```text +fallback_llm_configs + data model: null + runtime: [] + +retry_policy + data model: null + runtime: built-in retry policy + +fallback_policy + data model: null + runtime: off + +chat_template_kwargs + data model: null + runtime: omit from provider request +``` + +## LLM Config + +Use one reusable LLM config shape for primary and fallback entries. + +```python +class LLMConfig(BaseModel): + model: str = Field(..., json_schema_extra={"x-ag-type-ref": "model"}) + temperature: float | None = None + max_tokens: int | None = None + top_p: float | None = None + frequency_penalty: float | None = None + presence_penalty: float | None = None + reasoning_effort: Literal["none", "low", "medium", "high"] | None = None + chat_template_kwargs: dict | None = None + response_format: ResponseFormat | None = None + stream: bool | None = None + tools: list[dict] | None = None + tool_choice: Literal["none", "auto"] | dict | None = None +``` + +`fallback_llm_configs` item rules: + +- `model` is required. +- Every other field is optional. +- No retry/fallback fields are nested inside `LLMConfig`. +- `chat_template_kwargs` is part of `LLMConfig`, so it applies to both primary and fallback configs. + +## Retry Policy + +Keep this minimal and explicit. + +```python +class RetryPolicy(BaseModel): + max_retries: int = 0 + delay_ms: int = 0 +``` + +The runtime may extend delay strategy later, but the initial contract should stay small. + +## Fallback Policy + +```text +off + no fallback + +availability + network errors, timeout, 5xx, 503 + +capacity + availability + 429/rate limit/overload + +access + capacity + 401/403 + +any + access + 400/404/422 provider-call errors +``` + +Never fallback on local errors: + +```text +missing input key +invalid prompt template +malformed messages before provider call +local schema/config validation error +``` + +## Runtime Loop + +```text +llm_configs = [llm_config, *fallback_llm_configs] + +for current_llm_config in llm_configs: + run current_llm_config with retry_policy + + if success: + return response + + if max_retries: + if fallback_policy allows fallback given final error: + continue to next current_llm_config + + fail with final error + +fail with last error +``` + +## SDK Runtime Changes + +Add shared prompt-template helpers near `completion_v0` and `chat_v0`: + +- build candidates from `prompt.llm_config` and `prompt.fallback_llm_configs` +- run one candidate with `prompt.retry_policy` +- classify final errors +- evaluate `prompt.fallback_policy` +- resolve provider settings per candidate through `SecretsManager.get_provider_settings_from_workflow()` +- preserve `_apply_responses_bridge_if_needed()` per candidate + +Keep `PromptTemplate.to_openai_kwargs()` primary-only for compatibility. Add an internal helper for candidate-specific kwargs instead of changing that method's return contract. + +## Catalog And X-Ag Schema + +Expose the new fields through the existing `prompt-template` catalog type. + +Expected schema behavior: + +```json +{ + "fallback_llm_configs": { + "default": null, + "anyOf": [ + { + "type": "array", + "x-ag-type-ref": "llm-configs", + "items": { + "type": "object", + "x-ag-type-ref": "llm-config", + "properties": { + "model": { + "type": "string", + "x-ag-type-ref": "model" + } + }, + "required": ["model"] + } + }, + { "type": "null" } + ] + } +} +``` + +Use the same schema-default rule for `retry_policy`, `fallback_policy`, and `chat_template_kwargs`: nullable, with `default: null`. + +Add semantic catalog keys if useful for frontend rendering: + +- `llm-config` +- `llm-configs` +- `retry-policy` +- `fallback-policy` + +The minimum requirement is that `fallback_llm_configs.items.properties.model` keeps `x-ag-type-ref: "model"` so the frontend can reuse the grouped model selector. + +## Services + +Service wrappers should remain thin: + +- keep `CompletionConfig.prompt: PromptTemplate` +- keep `ChatConfig.prompt: PromptTemplate` +- rely on SDK model parsing and `model_dump()` + +No service-specific fallback policy should be introduced. + +## Web + +Schema-driven rendering should handle most fields automatically if catalog schemas are correct. + +Required web follow-up: + +- make sure `fallback_llm_configs` can be added, removed, and reordered in the prompt editor/drill-in UI +- make sure each fallback entry renders `model` with the grouped model selector +- expose `chat_template_kwargs` in model parameters and pass it through 1:1 +- make sure `retry_policy` and `fallback_policy` appear as prompt root fields, not under `llm_config` +- preserve fallback fields through prompt extraction, execution payload building, and commit flows +- update model display helpers only if we want to show fallback summary in registry/playground headers +- update refine prompt modal so it preserves unknown prompt root fields, or explicitly includes fallback fields + +## Backward Compatibility + +- Existing prompt templates remain valid. +- Existing `llm_config.model` remains the primary model. +- Existing `to_openai_kwargs()` callers keep getting the primary config only. +- Omitted/null `fallback_llm_configs` means runtime `[]`. +- Omitted/null `retry_policy` means runtime built-in retry defaults. +- Omitted/null `fallback_policy` means runtime `off`. +- Omitted/null `chat_template_kwargs` means no `chat_template_kwargs` key is sent. diff --git a/docs/designs/extend-prompt-templates/research.md b/docs/designs/extend-prompt-templates/research.md new file mode 100644 index 0000000000..0dfa3a6cfe --- /dev/null +++ b/docs/designs/extend-prompt-templates/research.md @@ -0,0 +1,182 @@ +# Fallback Models Research + +## Scope + +This captures the current code paths affected by prompt-template fallback models: + +- SDK prompt-template types and catalog schemas +- SDK runtime handlers +- managed services wrapping SDK handlers +- API catalog type exposure +- web playground/schema controls and registry consumers +- related model-parameter request in GitHub issue #3996 + +## Related Issue: #3996 + +[GitHub issue #3996](https://github.com/Agenta-AI/agenta/issues/3996) requests `chat_template_kwargs` in the playground `Model Parameters` panel. + +- Use case: some reasoning-capable models do not support `reasoning_effort` or soft prompt switches. +- Examples from the issue: IBM Granite uses `{"thinking": true/false}` and Qwen3 uses `{"enable_thinking": true/false}`. +- Desired behavior: expose an input in model parameters and pass `chat_template_kwargs` 1:1 to the API request. +- Current code search found no existing `chat_template_kwargs` implementation in the SDK/API/services/web paths checked. + +## SDK Types + +Current prompt-template types live in `application/sdk/agenta/sdk/utils/types.py`. + +- `ModelConfig` is the primary LLM config shape. It has `model`, temperature, token, sampling, response format, stream, tools, and tool choice fields. `model` has `json_schema_extra={"x-ag-type-ref": "model"}`. It does not have `chat_template_kwargs`. +- `PromptTemplate` currently has only `messages`, `template_format`, `input_keys`, and `llm_config`. +- `PromptTemplate.format()` formats messages and `llm_config.response_format`, then returns a new `PromptTemplate`. +- `PromptTemplate.to_openai_kwargs()` emits one OpenAI/LiteLLM payload from `self.llm_config`. +- `AgLLM` and `AgLLMs` are separate catalog semantic types for the newer `llm_v0` interface. They are not currently used by legacy `PromptTemplate`. +- `CATALOG_TYPES` exposes `"model"`, `"llm"`, `"llms"`, and `"prompt-template"` schemas for the API catalog. + +Relevant files: + +- `application/sdk/agenta/sdk/utils/types.py` +- `application/api/oss/tests/pytest/unit/evaluators/test_catalog_types.py` +- `application/sdk/oss/tests/pytest/acceptance/integrations/test_prompt_template_storage.py` + +## SDK Interfaces + +Legacy completion/chat interfaces use `single_prompt_parameters_schema()`. + +- The schema contains a top-level `prompt` property with `x-ag-type-ref: "prompt-template"`. +- Defaults currently include `messages`, `template_format`, `input_keys`, and `llm_config: {"model": "gpt-4o-mini"}`. +- Interface tests assert that `prompt` is a semantic reference, not inline `x-ag-type`. + +The newer `llm_v0` interface is separate. + +- It stores model attempts in `parameters.llms`. +- It already describes an ordered list of LLM configs. +- It does not expose prompt-template root fields like `fallback_llm_configs`, `retry_policy`, or `fallback_policy`. + +Relevant files: + +- `application/sdk/agenta/sdk/engines/running/interfaces.py` +- `application/api/oss/tests/pytest/unit/workflows/test_builtin_llm_interfaces.py` + +## SDK Runtime + +Legacy handlers: + +- `completion_v0` validates `parameters.prompt`, loads `SinglePromptConfig`, resolves secrets for `config.prompt.llm_config.model`, formats the prompt, applies the OpenAI Responses bridge if tools require it, and calls `mockllm.acompletion()` once. +- `chat_v0` follows the same single-model path and appends runtime chat messages before calling `mockllm.acompletion()`. +- There is no prompt-template fallback loop and no user-configurable retry policy. +- `chat_v0` calls `inputs.pop("messages", None)` before normalizing `inputs`, so this path should be cleaned up while touching the handler. + +Newer `llm_v0` handler: + +- `_call_llm_with_fallback()` already iterates `parameters.llms`. +- It falls back on a hardcoded LiteLLM exception tuple: auth, rate limit, service unavailable, and not found. +- It fetches provider keys directly via `retrieve_secrets()` and sets LiteLLM globals. +- It does not use `SecretsManager.get_provider_settings_from_workflow()`, so it does not exactly match legacy prompt custom-provider behavior. + +Other runtime pieces: + +- `mockllm.acompletion()` has an internal fixed retry for closed HTTP clients and Azure API connection errors, but this is not user policy. +- `InvalidSecretsV0Error`, `PromptCompletionV0Error`, and `LLMUnavailableV0Error` already exist. + +Relevant files: + +- `application/sdk/agenta/sdk/engines/running/handlers.py` +- `application/sdk/agenta/sdk/litellm/mockllm.py` +- `application/sdk/agenta/sdk/managers/secrets.py` +- `application/sdk/agenta/sdk/engines/running/errors.py` + +## Services + +The service package wraps SDK handlers. + +- `services/oss/src/completion.py` defines `CompletionConfig.prompt: PromptTemplate`, dumps it to JSON, and calls `completion_v0`. +- `services/oss/src/chat.py` defines `ChatConfig.prompt: PromptTemplate`, dumps it to JSON, and calls `chat_v0`. +- `services/oss/src/managed.py` exposes `llm_v0` separately as `agenta:builtin:llm:v0`. + +These wrappers should pick up new SDK `PromptTemplate` fields automatically if they are part of the Pydantic model and preserved by `model_dump(exclude_none=True)`. + +Relevant files: + +- `application/services/oss/src/completion.py` +- `application/services/oss/src/chat.py` +- `application/services/oss/src/managed.py` + +## API Catalog + +The API exposes SDK catalog types directly. + +- `get_workflow_catalog_types()` returns `CATALOG_TYPES`. +- `get_workflow_catalog_type()` returns one dereferenced schema by key. +- The web uses this to resolve `x-ag-type-ref: "prompt-template"`. +- `llm_apps_service.py` infers legacy config parameters and detects messages from `x-ag-type-ref` in schemas. + +Relevant files: + +- `application/api/oss/src/resources/workflows/catalog.py` +- `application/api/oss/src/services/llm_apps_service.py` + +## Web Schema Consumption + +The web mostly renders this through schema metadata. + +- `fetchAgTypeSchema("prompt-template")` fetches the dereferenced schema from `/workflows/catalog/types/{agType}`. +- `agTypeSchemaAtomFamily` caches those schemas. +- `parametersSchemaAtomFamily` recursively enriches `x-ag-type-ref` nodes with the fetched catalog type schema. +- `SchemaPropertyRenderer` prioritizes `x-ag-type-ref`, then `x-ag-type`, then legacy `x-parameter`. +- `PromptSchemaControl.isPromptSchema()` treats `x-ag-type-ref: "prompt-template"` as a prompt. +- `schemaUtils.getLLMConfigSchema()` finds nested `llm_config`, `llmConfig`, or canonical `llms[0]`. +- `schemaUtils.getLLMConfigProperties()` renders additional LLM config fields as advanced parameters. +- `PlaygroundConfigSection` has a model-parameters popover that updates `prompt.llm_config` or `llms[0]`. +- Issue #3996 specifically targets this model-parameters panel for `chat_template_kwargs`. + +Relevant files: + +- `application/web/packages/agenta-entities/src/workflow/api/api.ts` +- `application/web/packages/agenta-entities/src/workflow/state/store.ts` +- `application/web/packages/agenta-entities/src/workflow/state/molecule.ts` +- `application/web/packages/agenta-entity-ui/src/DrillInView/SchemaControls/SchemaPropertyRenderer.tsx` +- `application/web/packages/agenta-entity-ui/src/DrillInView/SchemaControls/PromptSchemaControl.tsx` +- `application/web/packages/agenta-entity-ui/src/DrillInView/SchemaControls/schemaUtils.ts` +- `application/web/packages/agenta-entity-ui/src/DrillInView/components/PlaygroundConfigSection.tsx` + +## Web Request Payload + +Playground execution sends stored prompt config through `ag_config`. + +- `requestBodyBuilder` preserves raw server config shape, overlays edited prompt configs, strips legacy `system_prompt` and `user_prompt`, sanitizes `llm_config.response_format`, strips Agenta metadata, and writes `data.ag_config`. +- For workflow invoke payloads, `executionItems.ts` maps legacy request pieces into `data.inputs` and `data.parameters`. +- Since fallback fields are root prompt fields, they must survive prompt extraction, merge, sanitization, and metadata stripping. + +Relevant files: + +- `application/web/packages/agenta-entities/src/shared/execution/requestBodyBuilder.ts` +- `application/web/packages/agenta-playground/src/state/execution/executionItems.ts` +- `application/web/packages/agenta-entities/src/runnable/utils.ts` +- `application/web/packages/agenta-entities/src/workflow/state/runnableSetup.ts` + +## Web Registry And Prompt Utilities + +Registry/prompt utility code reads the primary model only today. + +- `registryStore.pickModelFromParams()` recursively finds direct model fields or nested `llm_config.model`. +- `executionItems.getPromptModel()` reads `prompt.llm_config.model` for display/trace context. +- Gateway tools helpers preserve `llm_config` versus `llmConfig` paths. +- Refine prompt modal only models `messages`, `template_format`, `input_keys`, and `llm_config`; it drops any fallback fields when extracting a prompt. +- Variable extraction looks at prompt messages plus `llm_config.response_format` and `llm_config.tools`. + +Relevant files: + +- `application/web/oss/src/components/VariantsComponents/store/registryStore.ts` +- `application/web/packages/agenta-playground/src/state/execution/executionItems.ts` +- `application/web/oss/src/features/gateway-tools/prompt/atoms.ts` +- `application/web/oss/src/components/Playground/Components/Modals/RefinePromptModal/types.ts` +- `application/web/oss/src/components/Playground/Components/Modals/RefinePromptModal/hooks/useRefinePrompt.ts` +- `application/web/packages/agenta-shared/src/utils/chatPrompts.ts` + +## Current Tests To Update Or Add + +- SDK prompt-template storage roundtrip should include `retry_policy`, `fallback_policy`, and `fallback_llm_configs`. +- SDK/API/web tests should include `chat_template_kwargs` as a normal LLM config field. +- Catalog type tests should assert new prompt-template schema fields and `fallback_llm_configs.items.properties.model["x-ag-type-ref"] == "model"`. +- Built-in interface tests should preserve `x-ag-type-ref: "prompt-template"` and defaults. +- Handler tests should cover primary success, retry before fallback, fallback by policy, policy rejection, and final exhaustion. +- Web tests should cover schema rendering/persistence of fallback root fields and fallback model model-selector metadata. diff --git a/sdk/agenta/sdk/engines/running/handlers.py b/sdk/agenta/sdk/engines/running/handlers.py index 71a79b8429..ac50f8f2be 100644 --- a/sdk/agenta/sdk/engines/running/handlers.py +++ b/sdk/agenta/sdk/engines/running/handlers.py @@ -1,3 +1,4 @@ +import asyncio import json import math import os @@ -24,7 +25,14 @@ ) from agenta.sdk.litellm import mockllm -from agenta.sdk.utils.types import PromptTemplate, Message, Messages # noqa: F401 +from agenta.sdk.utils.types import ( # noqa: F401 + FallbackPolicy, + Message, + Messages, + ModelConfig, + PromptTemplate, + RetryPolicy, +) from agenta.sdk.managers.secrets import SecretsManager from agenta.sdk.decorators.tracing import instrument from agenta.sdk.models.shared import Data @@ -1884,7 +1892,9 @@ class SinglePromptConfig(BaseModel): def _apply_responses_bridge_if_needed( - formatted_prompt: PromptTemplate, provider_settings: Dict + formatted_prompt: PromptTemplate, + provider_settings: Dict, + llm_config: Optional[ModelConfig] = None, ) -> Dict: """ Checks if web_search_preview tool is present and applies responses bridge if needed. @@ -1900,7 +1910,7 @@ def _apply_responses_bridge_if_needed( Returns: The provider_settings dictionary, potentially modified to use responses bridge """ - tools = formatted_prompt.llm_config.tools + tools = (llm_config or formatted_prompt.llm_config).tools if tools: for tool in tools: if isinstance(tool, dict) and tool.get("type") in [ @@ -1914,6 +1924,181 @@ def _apply_responses_bridge_if_needed( return provider_settings +def _normalize_retry_policy(retry_policy: Optional[RetryPolicy]) -> RetryPolicy: + return retry_policy or RetryPolicy() + + +def _normalize_fallback_policy( + fallback_policy: Optional[FallbackPolicy], +) -> FallbackPolicy: + return fallback_policy or FallbackPolicy.OFF + + +def _prompt_llm_configs(prompt: PromptTemplate) -> List[ModelConfig]: + return [prompt.llm_config, *(prompt.fallback_llm_configs or [])] + + +def _error_status_code(error: Exception) -> Optional[int]: + status_code = getattr(error, "status_code", None) + if isinstance(status_code, int): + return status_code + + response = getattr(error, "response", None) + status_code = getattr(response, "status_code", None) + if isinstance(status_code, int): + return status_code + + return None + + +def _classify_prompt_fallback_error(error: Exception) -> Optional[str]: + if isinstance(error, InvalidSecretsV0Error): + return "access" + + if isinstance(error, (TimeoutError, httpx.TimeoutException)): + return "availability" + + if isinstance(error, httpx.RequestError): + return "availability" + + status_code = _error_status_code(error) + if status_code in (401, 403): + return "access" + if status_code == 429: + return "capacity" + if status_code == 503 or (status_code is not None and 500 <= status_code <= 599): + return "availability" + if status_code in (400, 404, 422): + return "any" + + error_text = f"{type(error).__name__} {str(error)}".lower() + if any( + marker in error_text + for marker in ( + "timeout", + "timed out", + "network", + "connection", + "connect", + "unavailable", + ) + ): + return "availability" + if any(marker in error_text for marker in ("rate limit", "ratelimit", "overload")): + return "capacity" + if any( + marker in error_text + for marker in ("auth", "unauthorized", "forbidden", "permission", "api key") + ): + return "access" + if any( + marker in error_text + for marker in ( + "badrequest", + "bad request", + "notfound", + "not found", + "validation", + ) + ): + return "any" + + return None + + +def _should_fallback( + error: Exception, fallback_policy: Optional[FallbackPolicy] +) -> bool: + policy = _normalize_fallback_policy(fallback_policy) + if policy == FallbackPolicy.OFF: + return False + + category = _classify_prompt_fallback_error(error) + if category is None: + return False + + allowed_categories = { + FallbackPolicy.AVAILABILITY: {"availability"}, + FallbackPolicy.CAPACITY: {"availability", "capacity"}, + FallbackPolicy.ACCESS: {"availability", "capacity", "access"}, + FallbackPolicy.ANY: {"availability", "capacity", "access", "any"}, + } + return category in allowed_categories.get(policy, set()) + + +async def _run_prompt_llm_config_with_retry( + formatted_prompt: PromptTemplate, + llm_config: ModelConfig, + retry_policy: Optional[RetryPolicy], + messages: Optional[List[Message]] = None, +): + policy = _normalize_retry_policy(retry_policy) + attempts = policy.max_retries + 1 + last_error = None + + for attempt in range(attempts): + try: + provider_settings = SecretsManager.get_provider_settings_from_workflow( + llm_config.model + ) + + if not provider_settings: + raise InvalidSecretsV0Error( + expected="dict", got=provider_settings, model=llm_config.model + ) + + provider_settings = _apply_responses_bridge_if_needed( + formatted_prompt, + dict(provider_settings), + llm_config=llm_config, + ) + openai_kwargs = formatted_prompt.to_openai_kwargs_for_llm_config(llm_config) + + if messages is not None: + openai_kwargs["messages"].extend(messages) + + with mockllm.user_aws_credentials_from(provider_settings): + return await mockllm.acompletion( + **{k: v for k, v in openai_kwargs.items() if k != "model"}, + **provider_settings, + ) + except Exception as exc: + last_error = exc + if attempt >= attempts - 1: + break + if policy.delay_ms > 0: + await asyncio.sleep(policy.delay_ms / 1000) + + raise last_error # type: ignore[misc] + + +async def _run_prompt_with_fallback( + formatted_prompt: PromptTemplate, + messages: Optional[List[Message]] = None, +): + llm_configs = _prompt_llm_configs(formatted_prompt) + last_error = None + + for index, current_llm_config in enumerate(llm_configs): + try: + return await _run_prompt_llm_config_with_retry( + formatted_prompt=formatted_prompt, + llm_config=current_llm_config, + retry_policy=formatted_prompt.retry_policy, + messages=messages, + ) + except Exception as exc: + last_error = exc + has_next_config = index < len(llm_configs) - 1 + if has_next_config and _should_fallback( + exc, formatted_prompt.fallback_policy + ): + continue + raise + + raise last_error # type: ignore[misc] + + @instrument(ignore_inputs=["parameters"]) async def completion_v0( parameters: Data, @@ -1938,33 +2123,9 @@ async def completion_v0( got=sorted(provided_keys), ) - await SecretsManager.ensure_secrets_in_workflow() - - provider_settings = SecretsManager.get_provider_settings_from_workflow( - config.prompt.llm_config.model - ) - - if not provider_settings: - model = getattr( - getattr(getattr(config, "prompt", None), "llm_config", None), "model", None - ) - raise InvalidSecretsV0Error(expected="dict", got=provider_settings, model=model) - formatted_prompt = config.prompt.format(**inputs) - - provider_settings = _apply_responses_bridge_if_needed( - formatted_prompt, provider_settings - ) - - with mockllm.user_aws_credentials_from(provider_settings): - response = await mockllm.acompletion( - **{ - k: v - for k, v in formatted_prompt.to_openai_kwargs().items() - if k != "model" - }, - **provider_settings, - ) + await SecretsManager.ensure_secrets_in_workflow() + response = await _run_prompt_with_fallback(formatted_prompt) message = response.choices[0].message # type: ignore @@ -1984,6 +2145,8 @@ async def chat_v0( inputs: Optional[Dict[str, str]] = None, messages: Optional[List[Message]] = None, ): + has_inputs = inputs is not None + inputs = dict(inputs or {}) # This prevents a mismatch in `required_keys != provided_keys`` inputs.pop("messages", None) @@ -2000,38 +2163,13 @@ async def chat_v0( got=sorted(provided_keys), ) - if inputs is not None: + if has_inputs: formatted_prompt = config.prompt.format(**inputs) else: formatted_prompt = config.prompt - openai_kwargs = formatted_prompt.to_openai_kwargs() - - if messages is not None: - openai_kwargs["messages"].extend(messages) await SecretsManager.ensure_secrets_in_workflow() - - provider_settings = SecretsManager.get_provider_settings_from_workflow( - config.prompt.llm_config.model - ) - - if not provider_settings: - model = getattr( - getattr(getattr(config, "prompt", None), "llm_config", None), "model", None - ) - raise InvalidSecretsV0Error(expected="dict", got=provider_settings, model=model) - - provider_settings = _apply_responses_bridge_if_needed( - formatted_prompt, provider_settings - ) - - with mockllm.user_aws_credentials_from(provider_settings): - response = await mockllm.acompletion( - **{ - k: v for k, v in openai_kwargs.items() if k != "model" - }, # we should use the model_name from provider_settings - **provider_settings, - ) + response = await _run_prompt_with_fallback(formatted_prompt, messages=messages) return response.choices[0].message.model_dump(exclude_none=True) # type: ignore @@ -3167,6 +3305,7 @@ async def _call_llm_with_fallback( "frequency_penalty", "presence_penalty", "reasoning_effort", + "chat_template_kwargs", ): val = llm_config.get(field) if val is not None: diff --git a/sdk/agenta/sdk/engines/running/interfaces.py b/sdk/agenta/sdk/engines/running/interfaces.py index 3d209badf9..30facbecfe 100644 --- a/sdk/agenta/sdk/engines/running/interfaces.py +++ b/sdk/agenta/sdk/engines/running/interfaces.py @@ -345,6 +345,7 @@ def llm_inputs_schema( ), x_ag_type="choice", ), + "chat_template_kwargs": obj(additional_properties=True), "tool_choice": { "oneOf": [ scalar(jtype="string", enum=["none", "auto"]), diff --git a/sdk/agenta/sdk/utils/types.py b/sdk/agenta/sdk/utils/types.py index 45534ac13a..562a0dbce2 100644 --- a/sdk/agenta/sdk/utils/types.py +++ b/sdk/agenta/sdk/utils/types.py @@ -1,6 +1,7 @@ import json from copy import deepcopy from dataclasses import dataclass +from enum import Enum from typing import Annotated, ClassVar, List, Union, Optional, Dict, Literal, Any from pydantic import ConfigDict, BaseModel, HttpUrl, RootModel @@ -446,6 +447,10 @@ class ModelConfig(BaseModel): "enum": ["none", "low", "medium", "high"], }, ) + chat_template_kwargs: Optional[Dict[str, Any]] = Field( + default=None, + description="Provider-specific chat template options passed through unchanged.", + ) response_format: Optional[ResponseFormat] = Field( default=None, description="An object specifying the format that the model must output", @@ -646,6 +651,7 @@ class AgLLM(AgSchemaMixin): default=None, json_schema_extra={"x-ag-type": "choice"}, ) + chat_template_kwargs: Optional[Dict[str, Any]] = Field(default=None) tool_choice: Optional[Union[Literal["none", "auto"], Dict]] = Field( default=None, ) @@ -669,6 +675,29 @@ def __getitem__(self, item): return self.root[item] +class FallbackModelConfig(ModelConfig): + """LLM config used for fallback entries. Same shape, required model.""" + + model: str = Field( + ..., + description="Model identifier to use for execution.", + json_schema_extra={"x-ag-type-ref": "model"}, + ) + + +class RetryPolicy(BaseModel): + max_retries: int = Field(default=1, ge=0) + delay_ms: int = Field(default=0, ge=0) + + +class FallbackPolicy(str, Enum): + OFF = "off" + AVAILABILITY = "availability" + CAPACITY = "capacity" + ACCESS = "access" + ANY = "any" + + class AgLoop(AgSchemaMixin): __ag_type__ = "loop" @@ -743,9 +772,44 @@ class PromptTemplate(AgSchemaMixin): default_factory=ModelConfig, description="Configuration for the model parameters", ) + fallback_llm_configs: Optional[List[FallbackModelConfig]] = Field( + default=None, + description="Ordered fallback LLM configs. Runtime default is no fallback configs.", + ) + retry_policy: Optional[RetryPolicy] = Field( + default=None, + description="Retry policy applied to each attempted LLM config.", + ) + fallback_policy: Optional[FallbackPolicy] = Field( + default=None, + description="Controls which provider-call errors can move execution to the next fallback config.", + json_schema_extra={ + "x-ag-type": "choice", + "enum": ["off", "availability", "capacity", "access", "any"], + "x-ag-metadata": { + "off": { + "description": "disable fallbacks", + }, + "availability": { + "description": "fall back on provider-side issues (or 5xx)", + }, + "capacity": { + "description": "availability + fall back on rate/quota limits (or 429)", + }, + "access": { + "description": "capacity + fall back on auth errors (or 401/403)", + }, + "any": { + "description": "access + fall back on any provider-call error (or 4xx)", + }, + }, + }, + ) @model_validator(mode="before") def init_messages(cls, values): + if not isinstance(values, dict): + return values if "messages" not in values: messages = [] if "system_prompt" in values and values["system_prompt"]: @@ -754,6 +818,13 @@ def init_messages(cls, values): messages.append(Message(role="user", content=values["user_prompt"])) if messages: values["messages"] = messages + fallback_configs = values.get("fallback_llm_configs") + if isinstance(fallback_configs, list): + for index, fallback_config in enumerate(fallback_configs): + if isinstance(fallback_config, dict) and not fallback_config.get( + "model" + ): + raise ValueError(f"fallback_llm_configs[{index}].model is required") return values def _format_with_template(self, content: str, kwargs: Dict[str, Any]) -> str: @@ -885,61 +956,81 @@ def format(self, **kwargs) -> "PromptTemplate": ) ) - new_llm_config = self.llm_config.model_copy(deep=True) - if new_llm_config.response_format is not None: - rf_dict = new_llm_config.response_format.model_dump(by_alias=True) - substituted = self._substitute_variables(rf_dict, kwargs) - rf_type = type(new_llm_config.response_format) - new_llm_config.response_format = rf_type(**substituted) + new_llm_config = self._format_llm_config(self.llm_config, kwargs) + new_fallback_llm_configs = None + if self.fallback_llm_configs is not None: + new_fallback_llm_configs = [ + self._format_llm_config(fallback_config, kwargs) + for fallback_config in self.fallback_llm_configs + ] return PromptTemplate( messages=new_messages, template_format=self.template_format, llm_config=new_llm_config, + fallback_llm_configs=new_fallback_llm_configs, + retry_policy=self.retry_policy, + fallback_policy=self.fallback_policy, input_keys=self.input_keys, ) + def _format_llm_config( + self, llm_config: ModelConfig, kwargs: Dict[str, Any] + ) -> ModelConfig: + new_llm_config = llm_config.model_copy(deep=True) + if new_llm_config.response_format is not None: + rf_dict = new_llm_config.response_format.model_dump(by_alias=True) + substituted = self._substitute_variables(rf_dict, kwargs) + rf_type = type(new_llm_config.response_format) + new_llm_config.response_format = rf_type(**substituted) + return new_llm_config + def to_openai_kwargs(self) -> dict: """Convert the prompt template to kwargs compatible with litellm/openai""" + return self.to_openai_kwargs_for_llm_config(self.llm_config) + + def to_openai_kwargs_for_llm_config(self, llm_config: ModelConfig) -> dict: + """Convert the prompt template to kwargs for a specific LLM config.""" kwargs = { "messages": [msg.model_dump(exclude_none=True) for msg in self.messages], } # Add optional parameters only if they are set - if self.llm_config.model is not None: - kwargs["model"] = self.llm_config.model + if llm_config.model is not None: + kwargs["model"] = llm_config.model - if self.llm_config.temperature is not None: - kwargs["temperature"] = self.llm_config.temperature + if llm_config.temperature is not None: + kwargs["temperature"] = llm_config.temperature - if self.llm_config.top_p is not None: - kwargs["top_p"] = self.llm_config.top_p + if llm_config.top_p is not None: + kwargs["top_p"] = llm_config.top_p - if self.llm_config.stream is not None: - kwargs["stream"] = self.llm_config.stream + if llm_config.stream is not None: + kwargs["stream"] = llm_config.stream - if self.llm_config.max_tokens is not None: - kwargs["max_tokens"] = self.llm_config.max_tokens + if llm_config.max_tokens is not None: + kwargs["max_tokens"] = llm_config.max_tokens - if self.llm_config.frequency_penalty is not None: - kwargs["frequency_penalty"] = self.llm_config.frequency_penalty + if llm_config.frequency_penalty is not None: + kwargs["frequency_penalty"] = llm_config.frequency_penalty - if self.llm_config.presence_penalty is not None: - kwargs["presence_penalty"] = self.llm_config.presence_penalty + if llm_config.presence_penalty is not None: + kwargs["presence_penalty"] = llm_config.presence_penalty - if self.llm_config.reasoning_effort is not None: - kwargs["reasoning_effort"] = self.llm_config.reasoning_effort + if llm_config.reasoning_effort is not None: + kwargs["reasoning_effort"] = llm_config.reasoning_effort - if self.llm_config.response_format: - kwargs["response_format"] = self.llm_config.response_format.dict( - by_alias=True - ) + if llm_config.chat_template_kwargs is not None: + kwargs["chat_template_kwargs"] = llm_config.chat_template_kwargs + + if llm_config.response_format: + kwargs["response_format"] = llm_config.response_format.dict(by_alias=True) - if self.llm_config.tools: - kwargs["tools"] = self.llm_config.tools + if llm_config.tools: + kwargs["tools"] = llm_config.tools # Only set tool_choice if tools are present - if self.llm_config.tool_choice is not None: - kwargs["tool_choice"] = self.llm_config.tool_choice + if llm_config.tool_choice is not None: + kwargs["tool_choice"] = llm_config.tool_choice return kwargs diff --git a/sdk/oss/tests/pytest/unit/test_prompt_template_extensions.py b/sdk/oss/tests/pytest/unit/test_prompt_template_extensions.py new file mode 100644 index 0000000000..6de0a5a8ef --- /dev/null +++ b/sdk/oss/tests/pytest/unit/test_prompt_template_extensions.py @@ -0,0 +1,155 @@ +import pytest + +from agenta.sdk.engines.running.handlers import ( + _normalize_fallback_policy, + _normalize_retry_policy, + _prompt_llm_configs, + _run_prompt_with_fallback, + _should_fallback, +) +from agenta.sdk.utils.types import ( + CATALOG_TYPES, + FallbackPolicy, + ModelConfig, + PromptTemplate, + RetryPolicy, +) + + +class ProviderError(Exception): + def __init__(self, status_code): + self.status_code = status_code + super().__init__(f"provider error {status_code}") + + +def test_new_prompt_template_fields_default_to_null_in_data_model(): + prompt = PromptTemplate() + + assert prompt.fallback_llm_configs is None + assert prompt.retry_policy is None + assert prompt.fallback_policy is None + + dumped = prompt.model_dump() + assert dumped["fallback_llm_configs"] is None + assert dumped["retry_policy"] is None + assert dumped["fallback_policy"] is None + + +def test_new_prompt_template_fields_normalize_at_runtime(): + prompt = PromptTemplate() + + assert _prompt_llm_configs(prompt) == [prompt.llm_config] + assert _normalize_retry_policy(prompt.retry_policy) == RetryPolicy( + max_retries=1, + delay_ms=0, + ) + assert _normalize_fallback_policy(prompt.fallback_policy) == FallbackPolicy.OFF + + +def test_chat_template_kwargs_is_passed_through_when_set(): + prompt = PromptTemplate( + llm_config=ModelConfig( + model="qwen/qwen3", + chat_template_kwargs={"enable_thinking": False}, + ) + ) + + assert prompt.to_openai_kwargs()["chat_template_kwargs"] == { + "enable_thinking": False + } + + +def test_chat_template_kwargs_is_not_template_formatted(): + prompt = PromptTemplate( + messages=[{"role": "user", "content": "Hello {{name}}"}], + input_keys=["name"], + llm_config=ModelConfig( + model="qwen/qwen3", + chat_template_kwargs={"literal": "{{provider_flag}}"}, + ), + fallback_llm_configs=[ + { + "model": "fallback", + "chat_template_kwargs": {"nested": {"literal": "{{fallback_flag}}"}}, + } + ], + ) + + formatted = prompt.format(name="Ada") + + assert formatted.messages[0].content == "Hello Ada" + assert formatted.llm_config.chat_template_kwargs == {"literal": "{{provider_flag}}"} + assert formatted.fallback_llm_configs[0].chat_template_kwargs == { + "nested": {"literal": "{{fallback_flag}}"} + } + + +def test_null_chat_template_kwargs_is_omitted_from_provider_kwargs(): + prompt = PromptTemplate(llm_config=ModelConfig(model="gpt-4o-mini")) + + assert "chat_template_kwargs" not in prompt.to_openai_kwargs() + + +def test_fallback_config_requires_model(): + with pytest.raises(ValueError, match="fallback_llm_configs\\[0\\]\\.model"): + PromptTemplate(fallback_llm_configs=[{"temperature": 0.2}]) + + +def test_prompt_template_catalog_schema_exposes_fallback_model_ref(): + schema = CATALOG_TYPES["prompt-template"] + fallback_schema = schema["properties"]["fallback_llm_configs"] + retry_schema = schema["properties"]["retry_policy"] + fallback_policy_schema = schema["properties"]["fallback_policy"] + array_schema = next( + option for option in fallback_schema["anyOf"] if option.get("type") == "array" + ) + retry_object_schema = next( + option for option in retry_schema["anyOf"] if option.get("type") == "object" + ) + + assert fallback_schema["default"] is None + assert array_schema["items"]["properties"]["model"]["x-ag-type-ref"] == "model" + assert "model" in array_schema["items"]["required"] + assert fallback_policy_schema["x-ag-type"] == "choice" + assert fallback_policy_schema["enum"] == [ + "off", + "availability", + "capacity", + "access", + "any", + ] + assert set(retry_object_schema["properties"]) == {"max_retries", "delay_ms"} + + +def test_fallback_policy_404_only_allowed_by_any(): + error = ProviderError(404) + + assert not _should_fallback(error, FallbackPolicy.ACCESS) + assert _should_fallback(error, FallbackPolicy.ANY) + + +@pytest.mark.asyncio +async def test_prompt_runner_moves_to_fallback_after_candidate_failure(monkeypatch): + calls = [] + + async def fake_run_candidate( + formatted_prompt, llm_config, retry_policy, messages=None + ): + calls.append(llm_config.model) + if llm_config.model == "primary": + raise ProviderError(503) + return "fallback response" + + monkeypatch.setattr( + "agenta.sdk.engines.running.handlers._run_prompt_llm_config_with_retry", + fake_run_candidate, + ) + + prompt = PromptTemplate( + llm_config=ModelConfig(model="primary"), + fallback_llm_configs=[{"model": "fallback"}], + fallback_policy=FallbackPolicy.AVAILABILITY, + ) + + assert await _run_prompt_with_fallback(prompt) == "fallback response" + assert calls == ["primary", "fallback"] diff --git a/web/oss/src/components/Playground/Components/Modals/RefinePromptModal/hooks/useRefinePrompt.ts b/web/oss/src/components/Playground/Components/Modals/RefinePromptModal/hooks/useRefinePrompt.ts index a4a75d7c2e..a20703c171 100644 --- a/web/oss/src/components/Playground/Components/Modals/RefinePromptModal/hooks/useRefinePrompt.ts +++ b/web/oss/src/components/Playground/Components/Modals/RefinePromptModal/hooks/useRefinePrompt.ts @@ -82,6 +82,7 @@ function extractPromptTemplate(promptValue: unknown): PromptTemplate | null { .filter(Boolean) as {role: string; content: string}[] return { + ...prompt, messages: extracted, template_format: typeof prompt.template_format === "string" ? prompt.template_format : "", } @@ -95,7 +96,7 @@ function extractPromptTemplate(promptValue: unknown): PromptTemplate | null { */ function parseRefineResponse( response: unknown, - originalTemplateFormat: string, + originalPrompt: PromptTemplate, ): { refinedPrompt: PromptTemplate | null explanation: string @@ -108,8 +109,9 @@ function parseRefineResponse( if (structured?.messages && Array.isArray(structured.messages)) { return { refinedPrompt: { + ...originalPrompt, messages: structured.messages as {role: string; content: string}[], - template_format: originalTemplateFormat, + template_format: originalPrompt.template_format || "", }, explanation, } @@ -184,10 +186,7 @@ export function useRefinePrompt({ throw new Error(errorText) } - const {refinedPrompt, explanation} = parseRefineResponse( - response, - promptToRefine.template_format || "", - ) + const {refinedPrompt, explanation} = parseRefineResponse(response, promptToRefine) if (!refinedPrompt) { throw new Error("No refined prompt in response") diff --git a/web/oss/src/components/Playground/Components/Modals/RefinePromptModal/types.ts b/web/oss/src/components/Playground/Components/Modals/RefinePromptModal/types.ts index 94fa1efb8f..9c70bef2b9 100644 --- a/web/oss/src/components/Playground/Components/Modals/RefinePromptModal/types.ts +++ b/web/oss/src/components/Playground/Components/Modals/RefinePromptModal/types.ts @@ -34,9 +34,13 @@ export interface PromptTemplate { role: string content: string }[] - template_format?: string - input_keys?: string[] - llm_config?: Record + template_format?: string | null + input_keys?: string[] | null + llm_config?: Record | null + fallback_llm_configs?: Record[] | null + retry_policy?: Record | null + fallback_policy?: string | null + [key: string]: unknown } /** diff --git a/web/packages/agenta-entities/src/shared/execution/requestBodyBuilder.ts b/web/packages/agenta-entities/src/shared/execution/requestBodyBuilder.ts index 9cdd80156e..f6ec43df4e 100644 --- a/web/packages/agenta-entities/src/shared/execution/requestBodyBuilder.ts +++ b/web/packages/agenta-entities/src/shared/execution/requestBodyBuilder.ts @@ -223,11 +223,16 @@ export function transformToRequestBody({ // Preserve input_keys and template_format from original parameters if they exist const originalPromptConfig = asRecord(originalParams[name]) if (Object.keys(originalPromptConfig).length > 0) { - if (originalPromptConfig["input_keys"] && !extracted["input_keys"]) { - extracted["input_keys"] = originalPromptConfig["input_keys"] - } - if (originalPromptConfig["template_format"] && !extracted["template_format"]) { - extracted["template_format"] = originalPromptConfig["template_format"] + for (const key of [ + "input_keys", + "template_format", + "fallback_llm_configs", + "retry_policy", + "fallback_policy", + ]) { + if (key in originalPromptConfig && !(key in extracted)) { + extracted[key] = originalPromptConfig[key] + } } } diff --git a/web/packages/agenta-entity-ui/src/DrillInView/components/PlaygroundConfigSection.tsx b/web/packages/agenta-entity-ui/src/DrillInView/components/PlaygroundConfigSection.tsx index fce39269ff..089e131a64 100644 --- a/web/packages/agenta-entity-ui/src/DrillInView/components/PlaygroundConfigSection.tsx +++ b/web/packages/agenta-entity-ui/src/DrillInView/components/PlaygroundConfigSection.tsx @@ -33,8 +33,8 @@ import {useDrillInUI} from "@agenta/ui/drill-in" import {formatLabel} from "@agenta/ui/drill-in" import {SelectLLMProviderBase} from "@agenta/ui/select-llm-provider" import {SharedEditor} from "@agenta/ui/shared-editor" -import {CaretDown, CaretRight, MagicWand} from "@phosphor-icons/react" -import {Button, Popover, Select, Tooltip, Typography} from "antd" +import {CaretDown, CaretRight, MagicWand, X} from "@phosphor-icons/react" +import {Button, InputNumber, Popover, Select, Tooltip, Typography} from "antd" import clsx from "clsx" import type {Atom, WritableAtom} from "jotai" import {atom} from "jotai" @@ -119,6 +119,15 @@ function hasParameters(data: {parameters?: Record} | null | und return Boolean(data?.parameters && Object.keys(data.parameters).length > 0) } +const FALLBACK_POLICY_OPTIONS = ["off", "availability", "capacity", "access", "any"].map( + (value) => ({label: value, value}), +) +const DEFAULT_RETRY_POLICY = { + max_retries: 1, + delay_ms: 0, +} +const PROMPT_EXTENSION_KEYS = ["fallback_llm_configs", "retry_policy", "fallback_policy"] + // ============================================================================ // AGENTA_METADATA HELPERS // ============================================================================ @@ -360,6 +369,137 @@ function buildDefaultAdapter(): ConfigSectionMoleculeAdapter { const defaultAdapter = buildDefaultAdapter() +// ============================================================================ +// LLM CONFIG EDITOR (shared by primary model + fallback configs) +// ============================================================================ + +interface LLMConfigEditorProps { + value: Record + onChange: (key: string, next: unknown) => void + // eslint-disable-next-line @typescript-eslint/no-explicit-any + llmConfigProps: Record + // eslint-disable-next-line @typescript-eslint/no-explicit-any + modelOptions: any[] + footerContent?: React.ReactNode + disabled?: boolean + title?: string + onReset?: () => void +} + +function LLMConfigEditor({ + value, + onChange, + llmConfigProps, + modelOptions, + footerContent, + disabled, + title = "Model Parameters", + onReset, +}: LLMConfigEditorProps) { + return ( +
+
+ {title} + {onReset && ( + + )} +
+ onChange("model", nextModel)} + size="small" + footerContent={footerContent} + disabled={disabled} + /> + {Object.entries(llmConfigProps).map(([key, propSchema]) => { + const resolved = resolveAnyOfSchema(propSchema) + const schemaType = resolved?.type + const enumValues = (resolved?.enum ?? propSchema?.enum) as string[] | undefined + + if (key === "chat_template_kwargs") { + const currentValue = value?.[key] + const editorValue = + currentValue == null ? "" : JSON.stringify(currentValue, null, 2) + return ( +
+ + {formatLabel(key)} + + { + const raw = nextEditorValue.trim() + if (!raw) { + onChange(key, null) + return + } + try { + onChange(key, JSON.parse(raw)) + } catch { + // Keep the last valid value. + } + }} + disabled={disabled} + className="min-h-[96px] overflow-hidden" + editorProps={{ + codeOnly: true, + language: "json", + showLineNumbers: false, + }} + syncWithInitialValueChanges + /> +
+ ) + } + + if (enumValues && enumValues.length > 0) { + return ( +
+ + {formatLabel(key)} + + + updatePromptRootField("fallback_policy", nextValue ?? null) + } + options={fallbackPolicyOptions} + placeholder="Select one" + disabled={disabled} + optionRender={(option) => { + const description = (option.data as {description?: string}).description + return ( +
+ {option.label} + {description && ( + + {description} + + )} +
+ ) + }} + /> +
+
+ Models + {fallbackConfigs.map((config, index) => ( +
+ + handleFallbackConfigChange(index, key, next) + } + llmConfigProps={promptModelInfo?.llmConfigProps ?? {}} + modelOptions={fallbackModelOptions} + footerContent={llmProviderConfig?.footerContent} + disabled={disabled} + onReset={() => handleResetFallbackConfig(index)} + /> + } + > + + +
+ ))} + +
+
+ + ) + // ========== FIELD ACTIONS SLOT ========== const fieldActionsSlot = useCallback((props: FieldActionsSlotProps) => { if (props.path.length === 1) return null @@ -784,6 +1242,42 @@ function PlaygroundConfigSection({ /> )} + {hasPromptExtensionFields && ( + <> + + + + + + + + )} -
- - Model Parameters - - -
- - {Object.entries(promptModelInfo.llmConfigProps).map( - ([key, propSchema]) => { - const resolved = resolveAnyOfSchema(propSchema) - const schemaType = resolved?.type - const enumValues = (resolved?.enum ?? - propSchema?.enum) as string[] | undefined - - if (enumValues && enumValues.length > 0) { - return ( -
- - {formatLabel(key)} - -