Skip to content

Commit 60c8120

Browse files
Support token exchange for bedrock
This has the following needs: 1. Baseurl support - handled in reqllm but not bifrost 2. Custom tuning of stream enablement 3. Potentially more
1 parent 57eff70 commit 60c8120

36 files changed

Lines changed: 864 additions & 122 deletions

assets/src/generated/graphql.ts

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1504,16 +1504,22 @@ export type BedrockAiAttributes = {
15041504
awsAccessKeyId?: InputMaybe<Scalars['String']['input']>;
15051505
/** the aws secret access key to use (DEPRECATED) */
15061506
awsSecretAccessKey?: InputMaybe<Scalars['String']['input']>;
1507+
/** the base url to use when querying a Bedrock-compatible API */
1508+
baseUrl?: InputMaybe<Scalars['String']['input']>;
15071509
/** Deprecated for most configurations: prefer regional-prefixed inference profile IDs in modelId or proxyModels (aliases are inferred automatically). Still needed for explicit client model name overrides, application inference profile resource IDs (profile suffix only, not full ARN), or when alias mapping cannot be inferred. Maps client-facing model ID to inference profile ID. Example: {"anthropic.claude-3-5-sonnet-20241022-v2:0": "us.anthropic.claude-3-5-sonnet-20241022-v2:0"} */
15081510
deployments?: InputMaybe<Scalars['Json']['input']>;
15091511
/** Bedrock model or inference profile for embeddings. Same ID formats as modelId. */
15101512
embeddingModel?: InputMaybe<Scalars['String']['input']>;
1513+
/** whether to enable streaming responses */
1514+
enableStream?: InputMaybe<Scalars['Boolean']['input']>;
15111515
/** AWS Bedrock model or inference profile identifier. Use a foundation model ID (e.g. anthropic.claude-3-5-sonnet-20241022-v2:0) or a regional inference profile ID with three dot-separated segments (e.g. us.anthropic.claude-3-5-sonnet-20241022-v2:0, global.anthropic.claude-haiku-4-5-20251001-v1:0). Nexus registers the bare model ID for routing and auto-maps 3-part profile IDs to Bifrost aliases. */
15121516
modelId?: InputMaybe<Scalars['String']['input']>;
15131517
/** Additional Bedrock model or inference profile IDs exposed through the Nexus OpenAI-compatible proxy beyond modelId, toolModelId, and embeddingModel. Same ID formats as modelId. */
15141518
proxyModels?: InputMaybe<Array<InputMaybe<Scalars['String']['input']>>>;
15151519
/** the aws region the model is hosted in */
15161520
region?: InputMaybe<Scalars['String']['input']>;
1521+
/** OAuth2 client credentials against a token endpoint to obtain access tokens */
1522+
tokenExchange?: InputMaybe<OpenaiTokenExchangeAttributes>;
15171523
/** Bedrock model or inference profile for tool calls. Same ID formats as modelId. */
15181524
toolModelId?: InputMaybe<Scalars['String']['input']>;
15191525
};
@@ -1523,16 +1529,22 @@ export type BedrockAiSettings = {
15231529
__typename?: 'BedrockAiSettings';
15241530
/** the openai bedrock aws access key id to use (DEPRECATED) */
15251531
accessKeyId?: Maybe<Scalars['String']['output']>;
1532+
/** the base url to use when querying a Bedrock-compatible API */
1533+
baseUrl?: Maybe<Scalars['String']['output']>;
15261534
/** Deprecated for most configurations: prefer regional-prefixed inference profile IDs in modelId or proxyModels (aliases are inferred automatically). Still needed for explicit client model name overrides, application inference profile resource IDs (profile suffix only, not full ARN), or when alias mapping cannot be inferred. Maps client-facing model ID to inference profile ID. Example: {"anthropic.claude-3-5-sonnet-20241022-v2:0": "us.anthropic.claude-3-5-sonnet-20241022-v2:0"} */
15271535
deployments?: Maybe<Scalars['Map']['output']>;
15281536
/** Bedrock model or inference profile for embeddings. Same ID formats as modelId. */
15291537
embeddingModel?: Maybe<Scalars['String']['output']>;
1538+
/** whether streaming responses are enabled */
1539+
enableStream?: Maybe<Scalars['Boolean']['output']>;
15301540
/** AWS Bedrock model or inference profile identifier. Use a foundation model ID (e.g. anthropic.claude-3-5-sonnet-20241022-v2:0) or a regional inference profile ID with three dot-separated segments (e.g. us.anthropic.claude-3-5-sonnet-20241022-v2:0, global.anthropic.claude-haiku-4-5-20251001-v1:0). Nexus registers the bare model ID for routing and auto-maps 3-part profile IDs to Bifrost aliases. Omit for Plural defaults. */
15311541
modelId?: Maybe<Scalars['String']['output']>;
15321542
/** Additional Bedrock model or inference profile IDs exposed through the Nexus OpenAI-compatible proxy beyond modelId, toolModelId, and embeddingModel. Same ID formats as modelId. */
15331543
proxyModels?: Maybe<Array<Maybe<Scalars['String']['output']>>>;
15341544
/** the aws region the model is hosted in */
15351545
region?: Maybe<Scalars['String']['output']>;
1546+
/** OAuth2 client credentials configured for token endpoint exchange */
1547+
tokenExchange?: Maybe<OpenaiTokenExchange>;
15361548
/** Bedrock model or inference profile for tool calls. Same ID formats as modelId. */
15371549
toolModelId?: Maybe<Scalars['String']['output']>;
15381550
};

go/client/models_gen.go

Lines changed: 12 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

go/controller/api/v1alpha1/deploymentsettings_types.go

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -749,13 +749,22 @@ func (in *AISettings) Attributes(ctx context.Context, c client.Client, namespace
749749
attr.Bedrock = &console.BedrockAiAttributes{
750750
ModelID: in.Bedrock.ModelID,
751751
ToolModelID: in.Bedrock.ToolModelId,
752+
BaseURL: in.Bedrock.BaseUrl,
752753
AccessToken: secret,
753754
Region: lo.ToPtr(in.Bedrock.Region),
754755
EmbeddingModel: in.Bedrock.EmbeddingModel,
755756
ProxyModels: lo.ToSlicePtr(in.Bedrock.ProxyModels),
756757
AWSSecretAccessKey: secretKey,
757758
AWSAccessKeyID: in.Bedrock.AwsAccessKeyID,
758759
Deployments: deployments,
760+
EnableStream: in.Bedrock.EnableStream,
761+
}
762+
if in.Bedrock.TokenExchange != nil {
763+
tokenExchange, err := in.Bedrock.TokenExchange.Attributes(ctx, c, namespace)
764+
if err != nil {
765+
return nil, err
766+
}
767+
attr.Bedrock.TokenExchange = tokenExchange
759768
}
760769
}
761770

@@ -1047,6 +1056,22 @@ type BedrockSettings struct {
10471056
// +kubebuilder:validation:Optional
10481057
ToolModelId *string `json:"toolModelId,omitempty"`
10491058

1059+
// BaseUrl is the base url to use when querying a Bedrock-compatible API.
1060+
//
1061+
// +kubebuilder:validation:Optional
1062+
BaseUrl *string `json:"baseUrl,omitempty"`
1063+
1064+
// EnableStream controls whether streaming responses are enabled. Set to false explicitly to disable, otherwise on.
1065+
//
1066+
// +kubebuilder:default=true
1067+
// +kubebuilder:validation:Optional
1068+
EnableStream *bool `json:"enableStream,omitempty"`
1069+
1070+
// TokenExchange configures OAuth2 client credentials against a token endpoint to obtain access tokens.
1071+
//
1072+
// +kubebuilder:validation:Optional
1073+
TokenExchange *OAuth2TokenExchange `json:"tokenExchange,omitempty"`
1074+
10501075
// EmbeddingModel is the Bedrock model or inference profile for embeddings. Same ID formats as modelId.
10511076
//
10521077
// +kubebuilder:validation:Optional

go/controller/api/v1alpha1/zz_generated.deepcopy.go

Lines changed: 15 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

go/controller/config/crd/bases/deployments.plural.sh_deploymentsettings.yaml

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,10 @@ spec:
235235
- key
236236
type: object
237237
x-kubernetes-map-type: atomic
238+
baseUrl:
239+
description: BaseUrl is the base url to use when querying
240+
a Bedrock-compatible API.
241+
type: string
238242
deployments:
239243
additionalProperties:
240244
type: string
@@ -250,6 +254,11 @@ spec:
250254
description: EmbeddingModel is the Bedrock model or inference
251255
profile for embeddings. Same ID formats as modelId.
252256
type: string
257+
enableStream:
258+
default: true
259+
description: EnableStream controls whether streaming responses
260+
are enabled.
261+
type: boolean
253262
modelId:
254263
description: |-
255264
ModelID is the primary AWS Bedrock model or inference profile identifier.
@@ -267,6 +276,46 @@ spec:
267276
description: Region is the AWS region the model is hosted
268277
in
269278
type: string
279+
tokenExchange:
280+
description: TokenExchange configures OAuth2 client credentials
281+
against a token endpoint to obtain access tokens.
282+
properties:
283+
clientId:
284+
description: ClientID is the OAuth2 client identifier.
285+
type: string
286+
clientSecretSecretRef:
287+
description: ClientSecretSecretRef is a reference to a
288+
Kubernetes secret key holding the OAuth2 client secret.
289+
properties:
290+
key:
291+
description: The key of the secret to select from. Must
292+
be a valid secret key.
293+
type: string
294+
name:
295+
default: ""
296+
description: |-
297+
Name of the referent.
298+
This field is effectively required, but due to backwards compatibility is
299+
allowed to be empty. Instances of this type with an empty value here are
300+
almost certainly wrong.
301+
More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
302+
type: string
303+
optional:
304+
description: Specify whether the Secret or its key
305+
must be defined
306+
type: boolean
307+
required:
308+
- key
309+
type: object
310+
x-kubernetes-map-type: atomic
311+
enabled:
312+
description: Enabled turns token exchange on for obtaining
313+
access tokens via the configured token endpoint.
314+
type: boolean
315+
tokenUrl:
316+
description: TokenURL is the OAuth2 token endpoint URL.
317+
type: string
318+
type: object
270319
tokenSecretRef:
271320
description: |-
272321
TokenSecretRef is a reference to the local secret holding the token to access

go/deployment-operator/api/v1alpha1/agentruntime_types.go

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -315,6 +315,13 @@ type CodexConfigRaw struct {
315315
// Timeout bounds a single codex run invocation.
316316
// +kubebuilder:validation:Optional
317317
Timeout *metav1.Duration `json:"timeout,omitempty"`
318+
319+
// DisableStream disables model provider API streaming when set to true explicitly.
320+
// When true with aiProxy, the harness sets X-Plural-Enable-Stream: false on the
321+
// plural model provider in config.toml. Codex has no direct env var; non-proxy
322+
// mode may not apply.
323+
// +kubebuilder:validation:Optional
324+
DisableStream *bool `json:"disableStream,omitempty"`
318325
}
319326

320327
type CodexConfig struct {
@@ -333,6 +340,10 @@ type CodexConfig struct {
333340
// Timeout bounds a single codex run invocation.
334341
// +kubebuilder:validation:Optional
335342
Timeout *metav1.Duration `json:"timeout,omitempty"`
343+
344+
// DisableStream disables model provider API streaming when set to true explicitly.
345+
// +kubebuilder:validation:Optional
346+
DisableStream *bool `json:"disableStream,omitempty"`
336347
}
337348

338349
func (in *CodexConfig) ToCodexConfigRaw(secretGetter func(corev1.SecretKeySelector) (*corev1.Secret, error)) (*CodexConfigRaw, error) {
@@ -341,9 +352,10 @@ func (in *CodexConfig) ToCodexConfigRaw(secretGetter func(corev1.SecretKeySelect
341352
}
342353

343354
result := &CodexConfigRaw{
344-
Model: in.Model,
345-
Endpoint: in.Endpoint,
346-
Timeout: in.Timeout,
355+
Model: in.Model,
356+
Endpoint: in.Endpoint,
357+
Timeout: in.Timeout,
358+
DisableStream: in.DisableStream,
347359
}
348360

349361
if !secretKeySelectorSet(in.ApiKeySecretRef) {
@@ -393,6 +405,12 @@ type ClaudeConfig struct {
393405
// for a command before it is terminated.
394406
// +kubebuilder:validation:Optional
395407
BashMaxTimeout *metav1.Duration `json:"bashMaxTimeout,omitempty"`
408+
409+
// DisableStream disables model provider API streaming when set to true explicitly.
410+
// The harness sets CLAUDE_CODE_EXTRA_BODY to {"stream":false} in settings.local.json.
411+
// With aiProxy, ANTHROPIC_CUSTOM_HEADERS also sends X-Plural-Enable-Stream: false.
412+
// +kubebuilder:validation:Optional
413+
DisableStream *bool `json:"disableStream,omitempty"`
396414
}
397415

398416
// ClaudeConfigRaw contains configuration for the Claude CLI runtime.
@@ -426,6 +444,10 @@ type ClaudeConfigRaw struct {
426444
// for a command before it is terminated.
427445
// +kubebuilder:validation:Optional
428446
BashMaxTimeout *metav1.Duration `json:"bashMaxTimeout,omitempty"`
447+
448+
// DisableStream disables model provider API streaming when set to true explicitly.
449+
// +kubebuilder:validation:Optional
450+
DisableStream *bool `json:"disableStream,omitempty"`
429451
}
430452

431453
func (in *ClaudeConfig) ToClaudeConfigRaw(secretGetter func(corev1.SecretKeySelector) (*corev1.Secret, error)) (*ClaudeConfigRaw, error) {
@@ -440,6 +462,7 @@ func (in *ClaudeConfig) ToClaudeConfigRaw(secretGetter func(corev1.SecretKeySele
440462
Timeout: in.Timeout,
441463
BashTimeout: in.BashTimeout,
442464
BashMaxTimeout: in.BashMaxTimeout,
465+
DisableStream: in.DisableStream,
443466
}
444467

445468
if !secretKeySelectorSet(in.ApiKeySecretRef) {

go/deployment-operator/internal/controller/agentrun_controller.go

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ const (
5050
EnvClaudeBashDefaultTimeout = "PLRL_CLAUDE_BASH_DEFAULT_TIMEOUT"
5151
EnvClaudeBashMaxTimeout = "PLRL_CLAUDE_BASH_MAX_TIMEOUT"
5252
EnvClaudeEndpoint = "PLRL_CLAUDE_ENDPOINT"
53+
EnvClaudeDisableStream = "PLRL_CLAUDE_DISABLE_STREAM"
5354

5455
EnvGeminiModel = "PLRL_GEMINI_MODEL"
5556
EnvGeminiAPIKey = "PLRL_GEMINI_API_KEY"
@@ -58,7 +59,8 @@ const (
5859

5960
EnvCodexModel = "PLRL_CODEX_MODEL"
6061
EnvCodexAPIKey = "PLRL_CODEX_API_KEY"
61-
EnvCodexEndpoint = "PLRL_CODEX_ENDPOINT"
62+
EnvCodexEndpoint = "PLRL_CODEX_ENDPOINT"
63+
EnvCodexDisableStream = "PLRL_CODEX_DISABLE_STREAM"
6264

6365
EnvDindEnabled = "PLRL_DIND_ENABLED"
6466
EnvBrowserEnabled = "PLRL_BROWSER_ENABLED"
@@ -523,6 +525,9 @@ func (r *AgentRunReconciler) getSecretData(run *v1alpha1.AgentRun, config *v1alp
523525
if config.Claude.Endpoint != nil {
524526
result[EnvClaudeEndpoint] = lo.FromPtr(config.Claude.Endpoint)
525527
}
528+
if config.Claude.DisableStream != nil && *config.Claude.DisableStream {
529+
result[EnvClaudeDisableStream] = "true"
530+
}
526531
}
527532

528533
if runtimeType == console.AgentRuntimeTypeGemini {
@@ -555,6 +560,9 @@ func (r *AgentRunReconciler) getSecretData(run *v1alpha1.AgentRun, config *v1alp
555560
if config.Codex.Endpoint != nil {
556561
result[EnvCodexEndpoint] = lo.FromPtr(config.Codex.Endpoint)
557562
}
563+
if config.Codex.DisableStream != nil && *config.Codex.DisableStream {
564+
result[EnvCodexDisableStream] = "true"
565+
}
558566
}
559567

560568
return result

0 commit comments

Comments
 (0)