Skip to content

Commit 8577d09

Browse files
authored
fix: default intrinsic adapter types; add tests for granite-switch; fix expected canned input/output with temperature (#994)
Assisted-by: CLAUDE:OPUS Signed-off-by: Jake LoRocco <jake.lorocco@ibm.com>
1 parent b433d8f commit 8577d09

7 files changed

Lines changed: 220 additions & 31 deletions

File tree

mellea/backends/adapters/catalog.py

Lines changed: 5 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -78,31 +78,15 @@ class IntriniscsCatalogEntry(pydantic.BaseModel):
7878
IntriniscsCatalogEntry(name="citations", repo_id=_RAG_REPO),
7979
IntriniscsCatalogEntry(name="context_relevance", repo_id=_RAG_REPO),
8080
IntriniscsCatalogEntry(name="hallucination_detection", repo_id=_RAG_REPO),
81-
IntriniscsCatalogEntry(
82-
name="query_clarification", repo_id=_RAG_REPO, adapter_types=(AdapterType.LORA,)
83-
),
81+
IntriniscsCatalogEntry(name="query_clarification", repo_id=_RAG_REPO),
8482
IntriniscsCatalogEntry(name="query_rewrite", repo_id=_RAG_REPO),
8583
############################################
8684
# Guardian Intrinsics
8785
############################################
88-
IntriniscsCatalogEntry(
89-
name="policy-guardrails",
90-
repo_id=_GUARDIAN_REPO,
91-
adapter_types=(AdapterType.LORA,),
92-
),
93-
IntriniscsCatalogEntry(
94-
name="guardian-core", repo_id=_GUARDIAN_REPO, adapter_types=(AdapterType.LORA,)
95-
),
96-
IntriniscsCatalogEntry(
97-
name="factuality-detection",
98-
repo_id=_GUARDIAN_REPO,
99-
adapter_types=(AdapterType.LORA,),
100-
),
101-
IntriniscsCatalogEntry(
102-
name="factuality-correction",
103-
repo_id=_GUARDIAN_REPO,
104-
adapter_types=(AdapterType.LORA,),
105-
),
86+
IntriniscsCatalogEntry(name="policy-guardrails", repo_id=_GUARDIAN_REPO),
87+
IntriniscsCatalogEntry(name="guardian-core", repo_id=_GUARDIAN_REPO),
88+
IntriniscsCatalogEntry(name="factuality-detection", repo_id=_GUARDIAN_REPO),
89+
IntriniscsCatalogEntry(name="factuality-correction", repo_id=_GUARDIAN_REPO),
10690
]
10791

10892
_INTRINSICS_CATALOG = {e.name: e for e in _INTRINSICS_CATALOG_ENTRIES}

test/backends/test_adapters/test_catalog.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,16 @@ def test_default_adapter_types():
5454
assert AdapterType.ALORA in entry.adapter_types
5555

5656

57-
def test_lora_only_entry():
57+
def test_lora_only_entry(monkeypatch):
58+
from mellea.backends.adapters import catalog
59+
60+
fake_entry = catalog.IntriniscsCatalogEntry(
61+
name="query_clarification",
62+
repo_id="ibm-granite/granitelib-rag-r1.0",
63+
adapter_types=(AdapterType.LORA,),
64+
)
65+
monkeypatch.setattr(
66+
catalog, "_INTRINSICS_CATALOG", {"query_clarification": fake_entry}
67+
)
5868
entry = fetch_intrinsic_metadata("query_clarification")
5969
assert entry.adapter_types == (AdapterType.LORA,)

test/backends/test_openai_intrinsics.py

Lines changed: 198 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
from mellea.stdlib import functional as mfuncs
4444
from mellea.stdlib.components import Intrinsic, Message
4545
from mellea.stdlib.components.docs.document import Document
46-
from mellea.stdlib.components.intrinsic import rag
46+
from mellea.stdlib.components.intrinsic import core as intrinsic_core, guardian, rag
4747
from mellea.stdlib.context import ChatContext
4848
from test.formatters.granite.test_intrinsics_formatters import (
4949
_YAML_JSON_COMBOS_WITH_MODEL,
@@ -355,13 +355,21 @@ def test_call_intrinsic_answerability(call_intrinsic_backend):
355355

356356

357357
@pytest.mark.qualitative
358-
def test_call_intrinsic_context_relevance(call_intrinsic_backend):
359-
"""call_intrinsic path: check_context_relevance returns a score between 0 and 1."""
360-
context, question, documents = _read_rag_input("context_relevance.json")
361-
result = rag.check_context_relevance(
362-
question, documents[0], context, call_intrinsic_backend
358+
def test_call_intrinsic_requirement_check(call_intrinsic_backend):
359+
"""call_intrinsic path: requirement_check returns a score between 0 and 1."""
360+
with open(_RAG_TEST_DATA / "requirement_check.json", encoding="utf-8") as f:
361+
data = json.load(f)
362+
363+
context = ChatContext()
364+
for m in data["messages"]:
365+
context = context.add(Message(m["role"], m["content"]))
366+
367+
requirement = data["requirement"]
368+
result = intrinsic_core.requirement_check(
369+
context, call_intrinsic_backend, requirement=requirement
363370
)
364-
assert result in ["relevant", "irrelevant", "partially relevant"]
371+
assert isinstance(result, float)
372+
assert 0.0 <= result <= 1.0
365373

366374

367375
# ---------------------------------------------------------------------------
@@ -399,3 +407,186 @@ def get_temperature(location: str) -> int:
399407
assert len(result.value) > 0
400408
parsed = json.loads(result.value)
401409
assert isinstance(parsed, dict)
410+
411+
412+
# ---------------------------------------------------------------------------
413+
# Guardian intrinsic tests — exercise the high-level convenience wrappers
414+
# ---------------------------------------------------------------------------
415+
416+
_GUARDIAN_TEST_DATA = (
417+
pathlib.Path(__file__).parent.parent
418+
/ "stdlib"
419+
/ "components"
420+
/ "intrinsic"
421+
/ "testdata"
422+
/ "input_json"
423+
)
424+
425+
426+
def _read_guardian_input(file_name: str) -> ChatContext:
427+
"""Read guardian test input and convert to a ChatContext."""
428+
with open(_GUARDIAN_TEST_DATA / file_name, encoding="utf-8") as f:
429+
json_data = json.load(f)
430+
431+
context = ChatContext()
432+
for m in json_data["messages"]:
433+
role = m["role"]
434+
content = m["content"]
435+
context = context.add(Message(role, content))
436+
437+
return context
438+
439+
440+
@pytest.mark.qualitative
441+
def test_call_intrinsic_policy_guardrails(call_intrinsic_backend):
442+
"""call_intrinsic path: policy_guardrails returns a compliance label."""
443+
context = _read_guardian_input("policy_guardrails.json")
444+
445+
policy_text = (
446+
"hiring managers should steer away from any questions that directly seek "
447+
'information about protected classes\u2014such as "how old are you," "where are '
448+
'you from," "what year did you graduate" or "what are your plans for having kids."'
449+
)
450+
451+
result = guardian.policy_guardrails(
452+
context, call_intrinsic_backend, policy_text=policy_text
453+
)
454+
assert result in ("Yes", "No", "Ambiguous")
455+
456+
457+
@pytest.mark.qualitative
458+
def test_call_intrinsic_guardian_check_harm(call_intrinsic_backend):
459+
"""call_intrinsic path: guardian_check detects harmful prompts."""
460+
context = _read_guardian_input("guardian_core.json")
461+
462+
result = guardian.guardian_check(
463+
context, call_intrinsic_backend, criteria="harm", target_role="user"
464+
)
465+
assert isinstance(result, float)
466+
assert 0.0 <= result <= 1.0
467+
468+
469+
@pytest.mark.qualitative
470+
def test_call_intrinsic_guardian_check_groundedness(call_intrinsic_backend):
471+
"""call_intrinsic path: guardian_check detects ungrounded responses."""
472+
document = Document(
473+
text=(
474+
"Eat (1964) is a 45-minute underground film created by Andy Warhol. "
475+
"The film was first shown by Jonas Mekas on July 16, 1964, at the "
476+
"Washington Square Gallery."
477+
),
478+
doc_id="0",
479+
)
480+
481+
context = (
482+
ChatContext()
483+
.add(Message("user", "When was the film Eat first shown?"))
484+
.add(
485+
Message(
486+
"assistant",
487+
"The film Eat was first shown by Jonas Mekas on December 24, "
488+
"1922 at the Washington Square Gallery.",
489+
documents=[document],
490+
)
491+
)
492+
)
493+
494+
result = guardian.guardian_check(
495+
context, call_intrinsic_backend, criteria="groundedness"
496+
)
497+
assert isinstance(result, float)
498+
assert 0.0 <= result <= 1.0
499+
500+
501+
@pytest.mark.qualitative
502+
def test_call_intrinsic_guardian_check_function_call(call_intrinsic_backend):
503+
"""call_intrinsic path: guardian_check detects function call hallucinations."""
504+
tools = [
505+
{
506+
"name": "comment_list",
507+
"description": "Fetches a list of comments for a specified IBM video.",
508+
"parameters": {
509+
"aweme_id": {
510+
"description": "The ID of the IBM video.",
511+
"type": "int",
512+
"default": "7178094165614464282",
513+
},
514+
"cursor": {
515+
"description": "The cursor for pagination. Defaults to 0.",
516+
"type": "int, optional",
517+
"default": "0",
518+
},
519+
"count": {
520+
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
521+
"type": "int, optional",
522+
"default": "20",
523+
},
524+
},
525+
}
526+
]
527+
tools_text = "Available tools:\n" + json.dumps(tools, indent=2)
528+
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
529+
# Deliberately wrong: uses "video_id" instead of "aweme_id"
530+
response_text = str(
531+
[{"name": "comment_list", "arguments": {"video_id": 456789123, "count": 15}}]
532+
)
533+
534+
context = (
535+
ChatContext()
536+
.add(Message("user", f"{tools_text}\n\n{user_text}"))
537+
.add(Message("assistant", response_text))
538+
)
539+
540+
result = guardian.guardian_check(
541+
context, call_intrinsic_backend, criteria="function_call"
542+
)
543+
assert isinstance(result, float)
544+
assert 0.0 <= result <= 1.0
545+
546+
547+
@pytest.mark.qualitative
548+
def test_call_intrinsic_factuality_detection(call_intrinsic_backend):
549+
"""call_intrinsic path: factuality_detection returns a yes/no label."""
550+
with open(_GUARDIAN_TEST_DATA / "factuality_detection.json", encoding="utf-8") as f:
551+
data = json.load(f)
552+
553+
context = ChatContext()
554+
docs = [
555+
Document(text=d["text"], doc_id=d.get("doc_id"))
556+
for d in data.get("extra_body", {}).get("documents", [])
557+
]
558+
messages = data["messages"]
559+
for i, m in enumerate(messages):
560+
is_last = i == len(messages) - 1
561+
if is_last and docs:
562+
context = context.add(Message(m["role"], m["content"], documents=docs))
563+
else:
564+
context = context.add(Message(m["role"], m["content"]))
565+
566+
result = guardian.factuality_detection(context, call_intrinsic_backend)
567+
assert result in ("yes", "no")
568+
569+
570+
@pytest.mark.qualitative
571+
def test_call_intrinsic_factuality_correction(call_intrinsic_backend):
572+
"""call_intrinsic path: factuality_correction returns corrected text or 'none'."""
573+
with open(
574+
_GUARDIAN_TEST_DATA / "factuality_correction.json", encoding="utf-8"
575+
) as f:
576+
data = json.load(f)
577+
578+
context = ChatContext()
579+
docs = [
580+
Document(text=d["text"], doc_id=d.get("doc_id"))
581+
for d in data.get("extra_body", {}).get("documents", [])
582+
]
583+
messages = data["messages"]
584+
for i, m in enumerate(messages):
585+
is_last = i == len(messages) - 1
586+
if is_last and docs:
587+
context = context.add(Message(m["role"], m["content"], documents=docs))
588+
else:
589+
context = context.add(Message(m["role"], m["content"]))
590+
591+
result = guardian.factuality_correction(context, call_intrinsic_backend)
592+
assert isinstance(result, str)

test/formatters/granite/testdata/test_canned_input/answerability_answerable.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,5 +26,6 @@
2626
}
2727
}
2828
},
29+
"temperature": 0.0,
2930
"max_completion_tokens": 6
3031
}

test/formatters/granite/testdata/test_canned_input/answerability_simple.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,5 +16,6 @@
1616
}
1717
}
1818
},
19-
"max_completion_tokens": 6
19+
"max_completion_tokens": 6,
20+
"temperature": 0.0
2021
}

test/formatters/granite/testdata/test_canned_input/answerability_unanswerable.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,5 +30,6 @@
3030
}
3131
}
3232
},
33+
"temperature": 0.0,
3334
"max_completion_tokens": 6
3435
}

test/formatters/granite/testdata/test_canned_input/context_relevance.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,5 +30,6 @@
3030
]
3131
}
3232
}
33-
}
33+
},
34+
"temperature": 0.0
3435
}

0 commit comments

Comments
 (0)