fix(ai): migrate gemma 3 models to gemma 4 (#16093)

daymxn · andrewheard · web-flow · commit ae3c030a8e7b · 2026-04-21T14:38:28.000-05:00
Co-authored-by: Andrew Heard &lt;andrewheard@google.com&gt;
diff --git a/FirebaseAI/Tests/TestApp/Sources/Constants.swift b/FirebaseAI/Tests/TestApp/Sources/Constants.swift
@@ -28,5 +28,5 @@ public enum ModelNames {
   public static let gemini2_5_FlashLivePreview = "gemini-2.5-flash-native-audio-preview-12-2025"
   public static let gemini2_5_Pro = "gemini-2.5-pro"
   public static let gemini3_1_FlashLitePreview = "gemini-3.1-flash-lite-preview"
-  public static let gemma3_4B = "gemma-3-4b-it"
+  public static let gemma4_31B = "gemma-4-31b-it"
 }
diff --git a/FirebaseAI/Tests/TestApp/Tests/Integration/GenerateContentIntegrationTests.swift b/FirebaseAI/Tests/TestApp/Tests/Integration/GenerateContentIntegrationTests.swift
@@ -53,14 +53,14 @@ struct GenerateContentIntegrationTests {
     (InstanceConfig.vertexAI_v1beta_global_appCheckLimitedUse, ModelNames.gemini2_5_FlashLite),
     (InstanceConfig.googleAI_v1beta, ModelNames.gemini3_1_FlashLitePreview),
     (InstanceConfig.googleAI_v1beta_appCheckLimitedUse, ModelNames.gemini3_1_FlashLitePreview),
-    (InstanceConfig.googleAI_v1beta, ModelNames.gemma3_4B),
-    (InstanceConfig.googleAI_v1beta_freeTier, ModelNames.gemma3_4B),
+    (InstanceConfig.googleAI_v1beta, ModelNames.gemma4_31B),
+    (InstanceConfig.googleAI_v1beta_freeTier, ModelNames.gemma4_31B),
     // Note: The following configs are commented out for easy one-off manual testing.
     // (InstanceConfig.googleAI_v1beta_freeTier, ModelNames.gemini2_5_FlashLite),
     // (InstanceConfig.googleAI_v1beta_staging, ModelNames.gemini2_5_FlashLite),
-    // (InstanceConfig.googleAI_v1beta_staging, ModelNames.gemma3_4B),
+    // (InstanceConfig.googleAI_v1beta_staging, ModelNames.gemma4_31B),
     // (InstanceConfig.googleAI_v1beta_freeTier_bypassProxy, ModelNames.gemini2_5_FlashLite),
-    // (InstanceConfig.googleAI_v1beta_freeTier_bypassProxy, ModelNames.gemma3_4B),
+    // (InstanceConfig.googleAI_v1beta_freeTier_bypassProxy, ModelNames.gemma4_31B),
   ])
   func generateContent(_ config: InstanceConfig, modelName: String) async throws {
     let model = FirebaseAI.componentInstance(config).generativeModel(
@@ -82,15 +82,13 @@ struct GenerateContentIntegrationTests {
     #expect(promptTokensDetails.modality == .text)
     #expect(promptTokensDetails.tokenCount == usageMetadata.promptTokenCount)
     // No thoughts in Flash Lite.
-    #expect(usageMetadata.thoughtsTokenCount == 0)
-    // The fields `candidatesTokenCount` and `candidatesTokensDetails` are not included when using
-    // Gemma models.
-    if modelName.hasPrefix("gemini-3") {
+    if !modelName.contains("flash-lite") {
+      #expect(usageMetadata.thoughtsTokenCount > 0)
+    }
+    // The `candidatesTokensDetails` field is not included when using Gemini 3 or Gemma models.
+    if modelName.hasPrefix("gemini-3") || modelName.hasPrefix("gemma") {
       #expect(usageMetadata.candidatesTokenCount == 2)
       #expect(usageMetadata.candidatesTokensDetails.isEmpty)
-    } else if modelName.hasPrefix("gemma") {
-      #expect(usageMetadata.candidatesTokenCount == 0)
-      #expect(usageMetadata.candidatesTokensDetails.isEmpty)
     } else {
       #expect(usageMetadata.candidatesTokenCount.isEqual(to: 3, accuracy: tokenCountAccuracy))
       #expect(usageMetadata.candidatesTokensDetails.count == 1)
@@ -528,15 +526,15 @@ struct GenerateContentIntegrationTests {
     ),
     (InstanceConfig.googleAI_v1beta, ModelNames.gemini2_5_FlashLite),
     (InstanceConfig.googleAI_v1beta_appCheckLimitedUse, ModelNames.gemini2_5_FlashLite),
-    (InstanceConfig.googleAI_v1beta, ModelNames.gemma3_4B),
+    (InstanceConfig.googleAI_v1beta, ModelNames.gemma4_31B),
     // Note: The following configs are commented out for easy one-off manual testing.
     // (InstanceConfig.vertexAI_v1beta_staging, ModelNames.gemini2_5_FlashLite),
     // (InstanceConfig.googleAI_v1beta_staging, ModelNames.gemini2_5_FlashLite),
-    // (InstanceConfig.googleAI_v1beta_staging, ModelNames.gemma3_4B),
+    // (InstanceConfig.googleAI_v1beta_staging, ModelNames.gemma4_31B),
     // (InstanceConfig.googleAI_v1beta_freeTier_bypassProxy, ModelNames.gemini2_5_FlashLite),
-    // (InstanceConfig.googleAI_v1beta_freeTier_bypassProxy, ModelNames.gemma3_4B),
+    // (InstanceConfig.googleAI_v1beta_freeTier_bypassProxy, ModelNames.gemma4_31B),
 //    (InstanceConfig.googleAI_v1beta_freeTier, ModelNames.gemini2_5_FlashLite),
-//    (InstanceConfig.googleAI_v1beta_freeTier, ModelNames.gemma3_4B),
+//    (InstanceConfig.googleAI_v1beta_freeTier, ModelNames.gemma4_31B),
   ])
   func generateContentStream(_ config: InstanceConfig, modelName: String) async throws {
     let expectedResponse = [
@@ -566,27 +564,41 @@ struct GenerateContentIntegrationTests {
         textValues.append(text)
       } else if let finishReason = value.candidates.first?.finishReason {
         #expect(finishReason == .stop)
+      } else if let thoughtSummary = value.thoughtSummary {
+        #expect(!thoughtSummary.isEmpty)
       } else {
         Issue.record("Expected a candidate with a `TextPart` or a `finishReason`; got \(value).")
       }
     }
 
+    // Tests the text derived from streaming directly
+    let modelJSONData = try #require(textValues.joined().data(using: .utf8))
+    let response = try JSONDecoder().decode([String].self, from: modelJSONData)
+    #expect(response == expectedResponse)
+
     let userHistory = try #require(chat.history.first)
     #expect(userHistory.role == "user")
     #expect(userHistory.parts.count == 1)
     let promptTextPart = try #require(userHistory.parts.first as? TextPart)
     #expect(promptTextPart.text == prompt)
     let modelHistory = try #require(chat.history.last)
     #expect(modelHistory.role == "model")
-    if modelName.hasPrefix("gemini-3.1-") {
-      #expect(modelHistory.parts.count == 2)
-    } else {
-      #expect(modelHistory.parts.count == 1)
+    let textParts = modelHistory.parts.compactMap { $0 as? TextPart }.filter {
+      !$0.isThoughtOrRelated()
     }
-    let modelTextPart = try #require(modelHistory.parts.first as? TextPart)
-    let modelJSONData = try #require(modelTextPart.text.data(using: .utf8))
-    let response = try JSONDecoder().decode([String].self, from: modelJSONData)
-    #expect(response == expectedResponse)
+    if textParts.count > 1 {
+      Issue.record("Found multiple text parts: \(textParts)")
+    }
+    #expect(
+      textParts.count == 1,
+      "The model should reply with exactly one (non thought) text response."
+    )
+
+    // Tests the text derived from the chat history
+    let historyTextPart = try #require(textParts.first)
+    let historyModelJSONData = try #require(historyTextPart.text.data(using: .utf8))
+    let historyResponse = try JSONDecoder().decode([String].self, from: historyModelJSONData)
+    #expect(historyResponse == expectedResponse)
   }
 
   @Test(arguments: [
@@ -665,3 +677,16 @@ struct GenerateContentIntegrationTests {
     }
   }
 }
+
+extension TextPart {
+  /// Whether this text part is a thought or thought related text part.
+  ///
+  /// In such cases, it can be ignored for display and testing purposes.
+  ///
+  /// We use this over just a standard `isThought` check so that we can
+  /// catch cases where the gemini model sends a text part with empty text that just
+  /// acts as the last thought of the model.
+  func isThoughtOrRelated() -> Bool {
+    return isThought || (thoughtSignature != nil && text.isEmpty)
+  }
+}

Original file line number	Diff line number	Diff line change
`@@ -28,5 +28,5 @@ public enum ModelNames {`
`28`	`28`	`public static let gemini2_5_FlashLivePreview = "gemini-2.5-flash-native-audio-preview-12-2025"`
`29`	`29`	`public static let gemini2_5_Pro = "gemini-2.5-pro"`
`30`	`30`	`public static let gemini3_1_FlashLitePreview = "gemini-3.1-flash-lite-preview"`
`31`		`- public static let gemma3_4B = "gemma-3-4b-it"`
	`31`	`+ public static let gemma4_31B = "gemma-4-31b-it"`
`32`	`32`	`}`