diff --git a/FirebaseAI/CHANGELOG.md b/FirebaseAI/CHANGELOG.md index 5f39f39e9f8..8b65e8e7e48 100644 --- a/FirebaseAI/CHANGELOG.md +++ b/FirebaseAI/CHANGELOG.md @@ -2,6 +2,9 @@ - [feature] Added support for GoogleMaps grounding. - [fixed] Fixed a `no member 'autoFunctionDeclaration'` compilation error on unofficially supported Xcode versions older than 26.2. (#16037) +- [feature] Added support for proactivity configuration with the live API via + `LiveProactivityConfig`. + # 12.12.0 - [feature] Added support for automatic function calling in diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentSetup.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentSetup.swift index 96a4082a599..148ffbf3bd7 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentSetup.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentSetup.swift @@ -55,13 +55,19 @@ struct BidiGenerateContentSetup: Encodable { /// turn. let outputAudioTranscription: BidiAudioTranscriptionConfig? + /// Optional. Configures the proactivity of the model. + /// + /// This allows the model to respond proactively to the input and to ignore irrelevant input. + let proactivity: ProactivityConfig? + init(model: String, generationConfig: BidiGenerationConfig? = nil, systemInstruction: ModelContent? = nil, tools: [Tool]? = nil, toolConfig: ToolConfig? = nil, inputAudioTranscription: BidiAudioTranscriptionConfig? = nil, - outputAudioTranscription: BidiAudioTranscriptionConfig? = nil) { + outputAudioTranscription: BidiAudioTranscriptionConfig? = nil, + proacity: ProactivityConfig? = nil) { self.model = model self.generationConfig = generationConfig self.systemInstruction = systemInstruction @@ -69,6 +75,7 @@ struct BidiGenerateContentSetup: Encodable { self.toolConfig = toolConfig self.inputAudioTranscription = inputAudioTranscription self.outputAudioTranscription = outputAudioTranscription + proactivity = proacity } } diff --git a/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift b/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift index bec393f3234..d22cd37e45a 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift @@ -152,7 +152,8 @@ actor LiveSessionService { tools: tools, toolConfig: toolConfig, inputAudioTranscription: generationConfig?.inputAudioTranscription, - outputAudioTranscription: generationConfig?.outputAudioTranscription + outputAudioTranscription: generationConfig?.outputAudioTranscription, + proacity: generationConfig?.proactivityConfig ) let data = try jsonEncoder.encode(BidiGenerateContentClientMessage.setup(setup)) try await webSocket.send(.data(data)) diff --git a/FirebaseAI/Sources/Types/Internal/Live/ProactivityConfig.swift b/FirebaseAI/Sources/Types/Internal/Live/ProactivityConfig.swift new file mode 100644 index 00000000000..08e92686c75 --- /dev/null +++ b/FirebaseAI/Sources/Types/Internal/Live/ProactivityConfig.swift @@ -0,0 +1,22 @@ +// Copyright 2026 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/// Config for proactivity features. +struct ProactivityConfig: Encodable, Sendable { + let proactiveAudio: Bool? + + init(proactiveAudio: Bool?) { + self.proactiveAudio = proactiveAudio + } +} diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveGenerationConfig.swift b/FirebaseAI/Sources/Types/Public/Live/LiveGenerationConfig.swift index 1302d1be31f..27d740a49b2 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveGenerationConfig.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveGenerationConfig.swift @@ -20,6 +20,7 @@ public struct LiveGenerationConfig: Sendable { let bidiGenerationConfig: BidiGenerationConfig let inputAudioTranscription: BidiAudioTranscriptionConfig? let outputAudioTranscription: BidiAudioTranscriptionConfig? + let proactivityConfig: ProactivityConfig? /// Creates a new ``LiveGenerationConfig`` value. /// @@ -118,13 +119,17 @@ public struct LiveGenerationConfig: Sendable { /// /// > Important: Transcripts are independent to the model turn. This means transcripts may /// > come earlier or later than when the model sends the corresponding audio responses. + /// - proactivity: Controls the proactivity of the modal. + /// + /// This allows the model to respond proactively to the input and to ignore irrelevant input. public init(temperature: Float? = nil, topP: Float? = nil, topK: Int? = nil, candidateCount: Int? = nil, maxOutputTokens: Int? = nil, presencePenalty: Float? = nil, frequencyPenalty: Float? = nil, responseModalities: [ResponseModality]? = nil, speech: SpeechConfig? = nil, inputAudioTranscription: AudioTranscriptionConfig? = nil, - outputAudioTranscription: AudioTranscriptionConfig? = nil) { + outputAudioTranscription: AudioTranscriptionConfig? = nil, + proactivity: LiveProactivityConfig? = nil) { self.init( BidiGenerationConfig( temperature: temperature, @@ -138,15 +143,18 @@ public struct LiveGenerationConfig: Sendable { speechConfig: speech?.speechConfig ), inputAudioTranscription: inputAudioTranscription?.audioTranscriptionConfig, - outputAudioTranscription: outputAudioTranscription?.audioTranscriptionConfig + outputAudioTranscription: outputAudioTranscription?.audioTranscriptionConfig, + proactivityConfig: proactivity?.proactivityConfig ) } init(_ bidiGenerationConfig: BidiGenerationConfig, inputAudioTranscription: BidiAudioTranscriptionConfig? = nil, - outputAudioTranscription: BidiAudioTranscriptionConfig? = nil) { + outputAudioTranscription: BidiAudioTranscriptionConfig? = nil, + proactivityConfig: ProactivityConfig? = nil) { self.bidiGenerationConfig = bidiGenerationConfig self.inputAudioTranscription = inputAudioTranscription self.outputAudioTranscription = outputAudioTranscription + self.proactivityConfig = proactivityConfig } } diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveProactivityConfig.swift b/FirebaseAI/Sources/Types/Public/Live/LiveProactivityConfig.swift new file mode 100644 index 00000000000..9d3035e7bf2 --- /dev/null +++ b/FirebaseAI/Sources/Types/Public/Live/LiveProactivityConfig.swift @@ -0,0 +1,37 @@ +// Copyright 2026 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation + +/// Configuration for controlling the proactivity of the model during conversation. +public struct LiveProactivityConfig: Sendable { + let proactivityConfig: ProactivityConfig + + init(_ proactivityConfig: ProactivityConfig) { + self.proactivityConfig = proactivityConfig + } + + /// Creates a new ``LiveProactivityConfig`` value. + /// + /// - Parameters: + /// - proactiveAudio: When enabled, the model can reject responding to the last prompt. For + /// example, this allows + /// the model to ignore out of context speech, or to stay silent if the user hasn't made a + /// request yet. + public init(proactiveAudio: Bool = false) { + self.init( + ProactivityConfig(proactiveAudio: proactiveAudio) + ) + } +}