[FIRE-987] feature: support new tool_use_quality_check & tuq_mode fields and deprecate tool_selection_quality_check & tsq_mode fields (#457)

amos-qualifire · yuval-qf · coderabbitai[bot] · web-flow · commit 32c9b155bd0f · 2026-01-07T16:35:19.000+02:00
* [FIRE-987] feature: support new tool_use_quality_check &amp; tuq_mode fields and deprecate tool_selection_quality_check &amp; tsq_mode fields

* [FIRE-987] ci-fix: run make codestyle

* Update qualifire/client.py

Co-authored-by: yuval-qf &lt;yuval@qualifire.ai&gt;

* Update qualifire/client.py

Co-authored-by: coderabbitai[bot] &lt;136622811+coderabbitai[bot]@users.noreply.github.com&gt;

* Update qualifire/client.py

Co-authored-by: yuval-qf &lt;yuval@qualifire.ai&gt;

* [FIRE-987] pr-fix: set only tuq_mode field and deprecate the tsq_mode from the Client class

* [FIRE-987] chore: fix linting

* [FIRE-987] chore: bump up SDK version

---------

Co-authored-by: yuval-qf &lt;yuval@qualifire.ai&gt;
Co-authored-by: coderabbitai[bot] &lt;136622811+coderabbitai[bot]@users.noreply.github.com&gt;
diff --git a/README.md b/README.md
@@ -57,7 +57,7 @@ print(f"Flagged: {result.evaluationResults[0].results[0].flagged}")
 | `pii_check` | Detect personally identifiable information |
 | `prompt_injections` | Identify prompt injection attempts |
 | `content_moderation_check` | Check for harmful content (harassment, hate speech, dangerous content, sexual content) |
-| `tool_selection_quality_check` | Evaluate quality of tool/function calls |
+| `tool_use_quality_check` | Evaluate quality of tool/function calls |
 | `syntax_checks` | Validate output syntax (JSON, SQL, etc.) |
 | `assertions` | Custom assertions to validate against the output |
 
@@ -184,7 +184,7 @@ result = client.evaluate(
             },
         ),
     ],
-    tool_selection_quality_check=True,
+    tool_use_quality_check=True,
 )
 ```
 
diff --git a/pyproject.toml b/pyproject.toml
@@ -5,7 +5,7 @@ build-backend = "poetry.core.masonry.api"
 
 [tool.poetry]
 name = "qualifire"
-version = "0.11.1"
+version = "0.12.0"
 description = "Qualifire Python SDK"
 readme = "README.md"
 authors = ["qualifire-dev <dror@qualifire.ai>"]
diff --git a/qualifire/client.py b/qualifire/client.py
@@ -52,9 +52,11 @@ def evaluate(
         prompt_injections: bool = False,
         sexual_content_check: bool = False,  # Deprecated: use content_moderation_check
         syntax_checks: Optional[Dict[str, SyntaxCheckArgs]] = None,
-        tool_selection_quality_check: bool = False,
+        tool_selection_quality_check: bool = False,  # Deprecated: use tool_use_quality_check
+        tool_use_quality_check: bool = False,
         content_moderation_check: bool = False,
-        tsq_mode: ModelMode = ModelMode.BALANCED,
+        tsq_mode: Optional[ModelMode] = None,  # Deprecated: use tuq_mode
+        tuq_mode: Optional[ModelMode] = None,
         consistency_mode: ModelMode = ModelMode.BALANCED,
         assertions_mode: ModelMode = ModelMode.BALANCED,
         grounding_mode: ModelMode = ModelMode.BALANCED,
@@ -69,9 +71,9 @@ def evaluate(
         :param input: The primary input for the evaluation.
         :param output: The primary output (e.g., LLM response) to evaluate.
         :param messages: List of message objects representing conversation history.
-            Must be set if tool_selection_quality_check is True.
+            Must be set if tool_use_quality_check is True.
         :param available_tools: List of available tools.
-            Must be set if tool_selection_quality_check is True.
+            Must be set if tool_use_quality_check is True.
         :param assertions: A list of custom assertions to check against the output.
         :param dangerous_content_check: .. deprecated:: Use :param:`content_moderation_check` instead.
             Check for dangerous content generation.
@@ -87,11 +89,16 @@ def evaluate(
         :param sexual_content_check: .. deprecated:: Use :param:`content_moderation_check` instead.
             Check for sexually explicit content.
         :param syntax_checks: Dictionary defining syntax checks (e.g., JSON, SQL).
-        :param tool_selection_quality_check: Check for tool selection quality.
+        :param tool_selection_quality_check: .. deprecated:: Use :param:`tool_use_quality_check` instead.
+            Check for tool selection quality.
+            Only works when `available_tools` and `messages` are provided.
+        :param tool_use_quality_check: Check for tool use quality
             Only works when `available_tools` and `messages` are provided.
         :param content_moderation_check: Check for content moderation (dangerous content,
             harassment, hate speech, and sexual content).
-        :param tsq_mode: Model mode for tool selection quality check (speed/balanced/quality).
+        :param tsq_mode: .. deprecated:: Use :param:`tuq_mode` instead.
+            Model mode for tool selection quality check (speed/balanced/quality).
+        :param tuq_mode: Model mode for tool use quality check (speed/balanced/quality).
         :param consistency_mode: Model mode for consistency check (speed/balanced/quality).
         :param assertions_mode: Model mode for assertions check (speed/balanced/quality).
         :param grounding_mode: Model mode for grounding check (speed/balanced/quality).
@@ -207,9 +214,10 @@ def evaluate(
             prompt_injections=prompt_injections,
             sexual_content_check=sexual_content_check,
             syntax_checks=syntax_checks,
-            tool_selection_quality_check=tool_selection_quality_check,
+            tool_use_quality_check=tool_use_quality_check
+            or tool_selection_quality_check,
             content_moderation_check=content_moderation_check,
-            tsq_mode=tsq_mode,
+            tuq_mode=tuq_mode if tuq_mode else tsq_mode,
             consistency_mode=consistency_mode,
             assertions_mode=assertions_mode,
             grounding_mode=grounding_mode,
diff --git a/qualifire/types.py b/qualifire/types.py
@@ -58,9 +58,11 @@ class EvaluationRequest:
     grounding_check: bool = False
     syntax_checks: Optional[Dict[str, SyntaxCheckArgs]] = None
     assertions: Optional[List[str]] = field(default_factory=list)
-    tool_selection_quality_check: bool = False
+    tool_selection_quality_check: bool = False  # Deprecated: use tool_use_quality_check
+    tool_use_quality_check: bool = False
     content_moderation_check: bool = False
-    tsq_mode: ModelMode = ModelMode.BALANCED
+    tsq_mode: Optional[ModelMode] = None  # Deprecated: use tuq_mode
+    tuq_mode: Optional[ModelMode] = None
     consistency_mode: ModelMode = ModelMode.BALANCED
     assertions_mode: ModelMode = ModelMode.BALANCED
     grounding_mode: ModelMode = ModelMode.BALANCED
@@ -81,15 +83,19 @@ def _validate_messages_input_output(self):
             )
 
     def _validate_tsq_requirements(self):
-        if self.tool_selection_quality_check and not self.messages:
+        if (
+            self.tool_selection_quality_check or self.tool_use_quality_check
+        ) and not self.messages:
             raise ValueError(
                 "messages must be provided in conjunction "
-                "with tool_selection_quality_check=True."
+                "with tool_use_quality_check=True."
             )
-        if self.tool_selection_quality_check and not self.available_tools:
+        if (
+            self.tool_selection_quality_check or self.tool_use_quality_check
+        ) and not self.available_tools:
             raise ValueError(
                 "available_tools must be provided in conjunction "
-                "with tool_selection_quality_check=True."
+                "with tool_use_quality_check=True."
             )
 
     def _handle_deprecated_content_checks(self):
diff --git a/setup.py b/setup.py
@@ -4,7 +4,7 @@
 if __name__ == "__main__":
     setup(
         name="qualifire",
-        version="0.11.0",
+        version="0.12.0",
         description="Qualifire Python SDK",
         author="qualifire-dev",
         author_email="dror@qualifire.ai",
diff --git a/tests/test_types.py b/tests/test_types.py
@@ -109,5 +109,5 @@ def test_validate_tsq_requirements(
                 input="input",  # To pass the messages-input-output validation
                 messages=messages,
                 available_tools=available_tools,
-                tool_selection_quality_check=tsq_check,
+                tool_use_quality_check=tsq_check,
             )

Original file line number	Diff line number	Diff line change
`@@ -109,5 +109,5 @@ def test_validate_tsq_requirements(`
`109`	`109`	`input="input", # To pass the messages-input-output validation`
`110`	`110`	`messages=messages,`
`111`	`111`	`available_tools=available_tools,`
`112`		`- tool_selection_quality_check=tsq_check,`
	`112`	`+ tool_use_quality_check=tsq_check,`
`113`	`113`	`)`