Skip to content

Commit 32c9b15

Browse files
amos-qualifireyuval-qfcoderabbitai[bot]
authored
[FIRE-987] feature: support new tool_use_quality_check & tuq_mode fields and deprecate tool_selection_quality_check & tsq_mode fields (#457)
* [FIRE-987] feature: support new tool_use_quality_check & tuq_mode fields and deprecate tool_selection_quality_check & tsq_mode fields * [FIRE-987] ci-fix: run make codestyle * Update qualifire/client.py Co-authored-by: yuval-qf <yuval@qualifire.ai> * Update qualifire/client.py Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> * Update qualifire/client.py Co-authored-by: yuval-qf <yuval@qualifire.ai> * [FIRE-987] pr-fix: set only tuq_mode field and deprecate the tsq_mode from the Client class * [FIRE-987] chore: fix linting * [FIRE-987] chore: bump up SDK version --------- Co-authored-by: yuval-qf <yuval@qualifire.ai> Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>
1 parent dbbeee0 commit 32c9b15

6 files changed

Lines changed: 33 additions & 19 deletions

File tree

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ print(f"Flagged: {result.evaluationResults[0].results[0].flagged}")
5757
| `pii_check` | Detect personally identifiable information |
5858
| `prompt_injections` | Identify prompt injection attempts |
5959
| `content_moderation_check` | Check for harmful content (harassment, hate speech, dangerous content, sexual content) |
60-
| `tool_selection_quality_check` | Evaluate quality of tool/function calls |
60+
| `tool_use_quality_check` | Evaluate quality of tool/function calls |
6161
| `syntax_checks` | Validate output syntax (JSON, SQL, etc.) |
6262
| `assertions` | Custom assertions to validate against the output |
6363

@@ -184,7 +184,7 @@ result = client.evaluate(
184184
},
185185
),
186186
],
187-
tool_selection_quality_check=True,
187+
tool_use_quality_check=True,
188188
)
189189
```
190190

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ build-backend = "poetry.core.masonry.api"
55

66
[tool.poetry]
77
name = "qualifire"
8-
version = "0.11.1"
8+
version = "0.12.0"
99
description = "Qualifire Python SDK"
1010
readme = "README.md"
1111
authors = ["qualifire-dev <dror@qualifire.ai>"]

qualifire/client.py

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -52,9 +52,11 @@ def evaluate(
5252
prompt_injections: bool = False,
5353
sexual_content_check: bool = False, # Deprecated: use content_moderation_check
5454
syntax_checks: Optional[Dict[str, SyntaxCheckArgs]] = None,
55-
tool_selection_quality_check: bool = False,
55+
tool_selection_quality_check: bool = False, # Deprecated: use tool_use_quality_check
56+
tool_use_quality_check: bool = False,
5657
content_moderation_check: bool = False,
57-
tsq_mode: ModelMode = ModelMode.BALANCED,
58+
tsq_mode: Optional[ModelMode] = None, # Deprecated: use tuq_mode
59+
tuq_mode: Optional[ModelMode] = None,
5860
consistency_mode: ModelMode = ModelMode.BALANCED,
5961
assertions_mode: ModelMode = ModelMode.BALANCED,
6062
grounding_mode: ModelMode = ModelMode.BALANCED,
@@ -69,9 +71,9 @@ def evaluate(
6971
:param input: The primary input for the evaluation.
7072
:param output: The primary output (e.g., LLM response) to evaluate.
7173
:param messages: List of message objects representing conversation history.
72-
Must be set if tool_selection_quality_check is True.
74+
Must be set if tool_use_quality_check is True.
7375
:param available_tools: List of available tools.
74-
Must be set if tool_selection_quality_check is True.
76+
Must be set if tool_use_quality_check is True.
7577
:param assertions: A list of custom assertions to check against the output.
7678
:param dangerous_content_check: .. deprecated:: Use :param:`content_moderation_check` instead.
7779
Check for dangerous content generation.
@@ -87,11 +89,16 @@ def evaluate(
8789
:param sexual_content_check: .. deprecated:: Use :param:`content_moderation_check` instead.
8890
Check for sexually explicit content.
8991
:param syntax_checks: Dictionary defining syntax checks (e.g., JSON, SQL).
90-
:param tool_selection_quality_check: Check for tool selection quality.
92+
:param tool_selection_quality_check: .. deprecated:: Use :param:`tool_use_quality_check` instead.
93+
Check for tool selection quality.
94+
Only works when `available_tools` and `messages` are provided.
95+
:param tool_use_quality_check: Check for tool use quality
9196
Only works when `available_tools` and `messages` are provided.
9297
:param content_moderation_check: Check for content moderation (dangerous content,
9398
harassment, hate speech, and sexual content).
94-
:param tsq_mode: Model mode for tool selection quality check (speed/balanced/quality).
99+
:param tsq_mode: .. deprecated:: Use :param:`tuq_mode` instead.
100+
Model mode for tool selection quality check (speed/balanced/quality).
101+
:param tuq_mode: Model mode for tool use quality check (speed/balanced/quality).
95102
:param consistency_mode: Model mode for consistency check (speed/balanced/quality).
96103
:param assertions_mode: Model mode for assertions check (speed/balanced/quality).
97104
:param grounding_mode: Model mode for grounding check (speed/balanced/quality).
@@ -207,9 +214,10 @@ def evaluate(
207214
prompt_injections=prompt_injections,
208215
sexual_content_check=sexual_content_check,
209216
syntax_checks=syntax_checks,
210-
tool_selection_quality_check=tool_selection_quality_check,
217+
tool_use_quality_check=tool_use_quality_check
218+
or tool_selection_quality_check,
211219
content_moderation_check=content_moderation_check,
212-
tsq_mode=tsq_mode,
220+
tuq_mode=tuq_mode if tuq_mode else tsq_mode,
213221
consistency_mode=consistency_mode,
214222
assertions_mode=assertions_mode,
215223
grounding_mode=grounding_mode,

qualifire/types.py

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -58,9 +58,11 @@ class EvaluationRequest:
5858
grounding_check: bool = False
5959
syntax_checks: Optional[Dict[str, SyntaxCheckArgs]] = None
6060
assertions: Optional[List[str]] = field(default_factory=list)
61-
tool_selection_quality_check: bool = False
61+
tool_selection_quality_check: bool = False # Deprecated: use tool_use_quality_check
62+
tool_use_quality_check: bool = False
6263
content_moderation_check: bool = False
63-
tsq_mode: ModelMode = ModelMode.BALANCED
64+
tsq_mode: Optional[ModelMode] = None # Deprecated: use tuq_mode
65+
tuq_mode: Optional[ModelMode] = None
6466
consistency_mode: ModelMode = ModelMode.BALANCED
6567
assertions_mode: ModelMode = ModelMode.BALANCED
6668
grounding_mode: ModelMode = ModelMode.BALANCED
@@ -81,15 +83,19 @@ def _validate_messages_input_output(self):
8183
)
8284

8385
def _validate_tsq_requirements(self):
84-
if self.tool_selection_quality_check and not self.messages:
86+
if (
87+
self.tool_selection_quality_check or self.tool_use_quality_check
88+
) and not self.messages:
8589
raise ValueError(
8690
"messages must be provided in conjunction "
87-
"with tool_selection_quality_check=True."
91+
"with tool_use_quality_check=True."
8892
)
89-
if self.tool_selection_quality_check and not self.available_tools:
93+
if (
94+
self.tool_selection_quality_check or self.tool_use_quality_check
95+
) and not self.available_tools:
9096
raise ValueError(
9197
"available_tools must be provided in conjunction "
92-
"with tool_selection_quality_check=True."
98+
"with tool_use_quality_check=True."
9399
)
94100

95101
def _handle_deprecated_content_checks(self):

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
if __name__ == "__main__":
55
setup(
66
name="qualifire",
7-
version="0.11.0",
7+
version="0.12.0",
88
description="Qualifire Python SDK",
99
author="qualifire-dev",
1010
author_email="dror@qualifire.ai",

tests/test_types.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,5 +109,5 @@ def test_validate_tsq_requirements(
109109
input="input", # To pass the messages-input-output validation
110110
messages=messages,
111111
available_tools=available_tools,
112-
tool_selection_quality_check=tsq_check,
112+
tool_use_quality_check=tsq_check,
113113
)

0 commit comments

Comments
 (0)