Merge pull request #1286 from roboflow/fixing-cla-last-last-last

PawelPeczek-Roboflow · web-flow · commit 51635b9af2fb · 2025-05-15T12:45:04.000+02:00
Fixing CLA for gemini models stuff
diff --git a/inference/core/workflows/core_steps/models/foundation/google_gemini/v1.py b/inference/core/workflows/core_steps/models/foundation/google_gemini/v1.py
@@ -173,9 +173,16 @@ class BlockManifest(WorkflowBlockManifest):
     )
     model_version: Union[
         Selector(kind=[STRING_KIND]),
-        Literal["gemini-2.0-flash-exp", "gemini-1.5-flash", "gemini-1.5-pro"],
+        Literal[
+            "gemini-2.0-flash-exp",
+            "gemini-1.5-flash",
+            "gemini-1.5-pro",
+            "gemini-2.0-flash",
+            "gemini-2.5-pro-preview-05-06",
+            "gemini-2.0-flash-lite",
+        ],
     ] = Field(
-        default="gemini-1.5-flash",
+        default="gemini-2.0-flash",
         description="Model to be used",
         examples=["gemini-2.0-flash-exp", "$inputs.gemini_model"],
     )
diff --git a/tests/workflows/integration_tests/execution/test_workflow_with_gemini_models.py b/tests/workflows/integration_tests/execution/test_workflow_with_gemini_models.py
@@ -61,7 +61,6 @@ def test_workflow_with_unconstrained_prompt(
     dogs_image: np.ndarray,
     license_plate_image: np.ndarray,
 ) -> None:
-    # given
     workflow_init_parameters = {
         "workflows_core.model_manager": model_manager,
         "workflows_core.step_execution_mode": StepExecutionMode.LOCAL,
@@ -71,8 +70,6 @@ def test_workflow_with_unconstrained_prompt(
         init_parameters=workflow_init_parameters,
         max_concurrent_steps=WORKFLOWS_MAX_CONCURRENT_STEPS,
     )
-
-    # when
     result = execution_engine.run(
         runtime_parameters={
             "image": [dogs_image, license_plate_image],
@@ -81,7 +78,6 @@ def test_workflow_with_unconstrained_prompt(
         }
     )
 
-    # then
     assert len(result) == 2, "Single image given, expected single output"
     assert set(result[0].keys()) == {"result"}, "Expected all outputs to be delivered"
     assert set(result[1].keys()) == {"result"}, "Expected all outputs to be delivered"
@@ -135,7 +131,6 @@ def test_workflow_with_ocr_prompt(
     model_manager: ModelManager,
     license_plate_image: np.ndarray,
 ) -> None:
-    # given
     workflow_init_parameters = {
         "workflows_core.model_manager": model_manager,
         "workflows_core.step_execution_mode": StepExecutionMode.LOCAL,
@@ -146,15 +141,13 @@ def test_workflow_with_ocr_prompt(
         max_concurrent_steps=WORKFLOWS_MAX_CONCURRENT_STEPS,
     )
 
-    # when
     result = execution_engine.run(
         runtime_parameters={
             "image": [license_plate_image],
             "api_key": GOOGLE_API_KEY,
         }
     )
 
-    # then
     assert len(result) == 1, "Single image given, expected single output"
     assert set(result[0].keys()) == {"result"}, "Expected all outputs to be delivered"
     assert (
@@ -914,7 +907,85 @@ def test_workflow_with_object_detection_prompt(
     ],
 }
 
+CAPTION_WITH_VERSION_WORKFLOW = {
+    "version": "1.0",
+    "inputs": [
+        {"type": "WorkflowImage", "name": "image"},
+        {"type": "WorkflowParameter", "name": "api_key"},
+        {"type": "WorkflowParameter", "name": "model_version"},
+    ],
+    "steps": [
+        {
+            "type": "roboflow_core/google_gemini@v1",
+            "name": "gemini",
+            "images": "$inputs.image",
+            "task_type": "caption",
+            "api_key": "$inputs.api_key",
+            "model_version": "$inputs.model_version",
+        },
+    ],
+    "outputs": [
+        {
+            "type": "JsonField",
+            "name": "result",
+            "selector": "$steps.gemini.output",
+        },
+    ],
+}
 
+@add_to_workflows_gallery(
+    category="Workflows with Visual Language Models",
+    use_case_title="Using different versions of Google's Gemini for Image Captioning",
+    use_case_description="""
+    In this example, we test different Gemini model versions for image captioning.
+    This workflow allows specifying any supported Gemini model version as input parameter.
+    """,
+    workflow_definition=CAPTION_WITH_VERSION_WORKFLOW,
+    workflow_name_in_app="gemini-version-captioning",
+)
+@pytest.mark.skipif(
+    condition=GOOGLE_API_KEY is None, reason="Google API key not provided"
+)
+def test_workflow_with_different_gemini_versions(
+    model_manager: ModelManager,
+    license_plate_image: np.ndarray,
+) -> None:
+    # Test all available model versions
+    model_versions = [
+        "gemini-1.5-flash",
+        "gemini-1.5-pro",
+        "gemini-2.0-flash",
+        "gemini-2.0-flash-exp",
+        "gemini-2.5-pro-preview-05-06",
+        "gemini-2.0-flash-lite"
+    ]
+    
+    # given
+    workflow_init_parameters = {
+        "workflows_core.model_manager": model_manager,
+        "workflows_core.step_execution_mode": StepExecutionMode.LOCAL,
+    }
+    execution_engine = ExecutionEngine.init(
+        workflow_definition=CAPTION_WITH_VERSION_WORKFLOW,
+        init_parameters=workflow_init_parameters,
+        max_concurrent_steps=WORKFLOWS_MAX_CONCURRENT_STEPS,
+    )
+    
+    for version in model_versions:
+        result = execution_engine.run(
+            runtime_parameters={
+                "image": [license_plate_image],
+                "api_key": GOOGLE_API_KEY,
+                "model_version": version,
+            }
+        )
+        
+        assert len(result) == 1, f"Single image given, expected single output for version {version}"
+        assert set(result[0].keys()) == {"result"}, f"Expected output key 'result' for version {version}"
+        assert (
+            isinstance(result[0]["result"], str) and len(result[0]["result"]) > 0
+        ), f"Expected non-empty string generated for version {version}"
+        
 @add_to_workflows_gallery(
     category="Workflows with Visual Language Models",
     use_case_title="Using Google's Gemini as secondary classifier",
@@ -963,3 +1034,4 @@ def test_workflow_with_secondary_classifier(
     assert "dog" not in set(
         result[0]["predictions"].data["class_name"].tolist()
     ), "Expected classes to be substituted"
+