You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: docs/openapi.json
+54Lines changed: 54 additions & 0 deletions
Original file line number
Diff line number
Diff line change
@@ -11898,6 +11898,46 @@
11898
11898
"title": "ClientCredentialsOAuthFlow",
11899
11899
"description": "Defines configuration details for the OAuth 2.0 Client Credentials flow."
11900
11900
},
11901
+
"CompactionConfiguration": {
11902
+
"properties": {
11903
+
"enabled": {
11904
+
"type": "boolean",
11905
+
"title": "Enable compaction",
11906
+
"description": "When true, older conversation turns are summarized when estimated tokens approach the context window limit.",
11907
+
"default": false
11908
+
},
11909
+
"threshold_ratio": {
11910
+
"type": "number",
11911
+
"title": "Threshold ratio",
11912
+
"description": "Trigger compaction when estimated tokens exceed this fraction of the model's context window (0.0-1.0).",
11913
+
"default": 0.7
11914
+
},
11915
+
"token_floor": {
11916
+
"type": "integer",
11917
+
"minimum": 0.0,
11918
+
"title": "Token floor",
11919
+
"description": "Minimum token count before compaction can trigger. Prevents triggering on very small context windows.",
11920
+
"default": 4096
11921
+
},
11922
+
"buffer_turns": {
11923
+
"type": "integer",
11924
+
"minimum": 0.0,
11925
+
"title": "Buffer turns",
11926
+
"description": "Number of recent turns to keep verbatim.",
11927
+
"default": 4
11928
+
},
11929
+
"buffer_max_ratio": {
11930
+
"type": "number",
11931
+
"title": "Buffer max ratio",
11932
+
"description": "Maximum fraction of context window the buffer zone can occupy, regardless of buffer_turns.",
11933
+
"default": 0.3
11934
+
}
11935
+
},
11936
+
"additionalProperties": false,
11937
+
"type": "object",
11938
+
"title": "CompactionConfiguration",
11939
+
"description": "Configuration for conversation history compaction.\n\nCompaction summarizes older conversation turns when their estimated\ntoken count approaches the context window limit, keeping the\nconversation usable instead of failing with HTTP 413. The\nconfiguration here controls when compaction triggers and how much\nrecent context is preserved verbatim.\n\nAttributes:\n enabled: Master switch. When False, compaction never triggers\n and other fields are inert.\n threshold_ratio: Trigger compaction when estimated input tokens\n exceed this fraction of the model's context window\n (clamped to 0.0..1.0).\n token_floor: Minimum estimated token count before compaction\n can trigger, regardless of threshold_ratio. Prevents\n triggering on very small context windows.\n buffer_turns: Initial number of recent turns to keep verbatim.\n The runtime applies a degrading guard \u2014 if these turns\n exceed the available budget, it reduces buffer_turns by\n one repeatedly until the budget fits, down to zero.\n buffer_max_ratio: Hard cap on the fraction of the context\n window the buffer zone may occupy, regardless of\n buffer_turns."
"description": "Controls when conversation history is summarized to keep the model's input below the context window limit. Disabled by default \u2014 when disabled, requests that exceed the window continue to surface as HTTP 413."
12018
+
},
11974
12019
"byok_rag": {
11975
12020
"items": {
11976
12021
"$ref": "#/components/schemas/ByokRag"
@@ -13391,6 +13436,15 @@
13391
13436
],
13392
13437
"title": "Default provider",
13393
13438
"description": "Identification of default provider used when no other model is specified."
"description": "Map of fully-qualified model identifier (e.g., \"openai/gpt-4o-mini\") to context window size in tokens. Used by the conversation compaction trigger to decide when older turns must be summarized before the input exceeds the window. Models absent from this map have no registered window \u2014 callers fall back to their own default or skip the token-based trigger."
0 commit comments