@@ -61,7 +61,6 @@ def test_workflow_with_unconstrained_prompt(
6161 dogs_image : np .ndarray ,
6262 license_plate_image : np .ndarray ,
6363) -> None :
64- # given
6564 workflow_init_parameters = {
6665 "workflows_core.model_manager" : model_manager ,
6766 "workflows_core.step_execution_mode" : StepExecutionMode .LOCAL ,
@@ -71,8 +70,6 @@ def test_workflow_with_unconstrained_prompt(
7170 init_parameters = workflow_init_parameters ,
7271 max_concurrent_steps = WORKFLOWS_MAX_CONCURRENT_STEPS ,
7372 )
74-
75- # when
7673 result = execution_engine .run (
7774 runtime_parameters = {
7875 "image" : [dogs_image , license_plate_image ],
@@ -81,7 +78,6 @@ def test_workflow_with_unconstrained_prompt(
8178 }
8279 )
8380
84- # then
8581 assert len (result ) == 2 , "Single image given, expected single output"
8682 assert set (result [0 ].keys ()) == {"result" }, "Expected all outputs to be delivered"
8783 assert set (result [1 ].keys ()) == {"result" }, "Expected all outputs to be delivered"
@@ -135,7 +131,6 @@ def test_workflow_with_ocr_prompt(
135131 model_manager : ModelManager ,
136132 license_plate_image : np .ndarray ,
137133) -> None :
138- # given
139134 workflow_init_parameters = {
140135 "workflows_core.model_manager" : model_manager ,
141136 "workflows_core.step_execution_mode" : StepExecutionMode .LOCAL ,
@@ -146,15 +141,13 @@ def test_workflow_with_ocr_prompt(
146141 max_concurrent_steps = WORKFLOWS_MAX_CONCURRENT_STEPS ,
147142 )
148143
149- # when
150144 result = execution_engine .run (
151145 runtime_parameters = {
152146 "image" : [license_plate_image ],
153147 "api_key" : GOOGLE_API_KEY ,
154148 }
155149 )
156150
157- # then
158151 assert len (result ) == 1 , "Single image given, expected single output"
159152 assert set (result [0 ].keys ()) == {"result" }, "Expected all outputs to be delivered"
160153 assert (
@@ -914,7 +907,85 @@ def test_workflow_with_object_detection_prompt(
914907 ],
915908}
916909
910+ CAPTION_WITH_VERSION_WORKFLOW = {
911+ "version" : "1.0" ,
912+ "inputs" : [
913+ {"type" : "WorkflowImage" , "name" : "image" },
914+ {"type" : "WorkflowParameter" , "name" : "api_key" },
915+ {"type" : "WorkflowParameter" , "name" : "model_version" },
916+ ],
917+ "steps" : [
918+ {
919+ "type" : "roboflow_core/google_gemini@v1" ,
920+ "name" : "gemini" ,
921+ "images" : "$inputs.image" ,
922+ "task_type" : "caption" ,
923+ "api_key" : "$inputs.api_key" ,
924+ "model_version" : "$inputs.model_version" ,
925+ },
926+ ],
927+ "outputs" : [
928+ {
929+ "type" : "JsonField" ,
930+ "name" : "result" ,
931+ "selector" : "$steps.gemini.output" ,
932+ },
933+ ],
934+ }
917935
936+ @add_to_workflows_gallery (
937+ category = "Workflows with Visual Language Models" ,
938+ use_case_title = "Using different versions of Google's Gemini for Image Captioning" ,
939+ use_case_description = """
940+ In this example, we test different Gemini model versions for image captioning.
941+ This workflow allows specifying any supported Gemini model version as input parameter.
942+ """ ,
943+ workflow_definition = CAPTION_WITH_VERSION_WORKFLOW ,
944+ workflow_name_in_app = "gemini-version-captioning" ,
945+ )
946+ @pytest .mark .skipif (
947+ condition = GOOGLE_API_KEY is None , reason = "Google API key not provided"
948+ )
949+ def test_workflow_with_different_gemini_versions (
950+ model_manager : ModelManager ,
951+ license_plate_image : np .ndarray ,
952+ ) -> None :
953+ # Test all available model versions
954+ model_versions = [
955+ "gemini-1.5-flash" ,
956+ "gemini-1.5-pro" ,
957+ "gemini-2.0-flash" ,
958+ "gemini-2.0-flash-exp" ,
959+ "gemini-2.5-pro-preview-05-06" ,
960+ "gemini-2.0-flash-lite"
961+ ]
962+
963+ # given
964+ workflow_init_parameters = {
965+ "workflows_core.model_manager" : model_manager ,
966+ "workflows_core.step_execution_mode" : StepExecutionMode .LOCAL ,
967+ }
968+ execution_engine = ExecutionEngine .init (
969+ workflow_definition = CAPTION_WITH_VERSION_WORKFLOW ,
970+ init_parameters = workflow_init_parameters ,
971+ max_concurrent_steps = WORKFLOWS_MAX_CONCURRENT_STEPS ,
972+ )
973+
974+ for version in model_versions :
975+ result = execution_engine .run (
976+ runtime_parameters = {
977+ "image" : [license_plate_image ],
978+ "api_key" : GOOGLE_API_KEY ,
979+ "model_version" : version ,
980+ }
981+ )
982+
983+ assert len (result ) == 1 , f"Single image given, expected single output for version { version } "
984+ assert set (result [0 ].keys ()) == {"result" }, f"Expected output key 'result' for version { version } "
985+ assert (
986+ isinstance (result [0 ]["result" ], str ) and len (result [0 ]["result" ]) > 0
987+ ), f"Expected non-empty string generated for version { version } "
988+
918989@add_to_workflows_gallery (
919990 category = "Workflows with Visual Language Models" ,
920991 use_case_title = "Using Google's Gemini as secondary classifier" ,
@@ -963,3 +1034,4 @@ def test_workflow_with_secondary_classifier(
9631034 assert "dog" not in set (
9641035 result [0 ]["predictions" ].data ["class_name" ].tolist ()
9651036 ), "Expected classes to be substituted"
1037+
0 commit comments