11import os
2- from unittest .mock import MagicMock , patch
2+ from unittest .mock import MagicMock
33
44import pytest
55from haystack import Document , Pipeline
@@ -34,13 +34,6 @@ def score(self, **kwargs) -> MetricResult:
3434 return MetricResult (value = 1.0 , reason = "test" )
3535
3636
37- def make_llm_mock (model : str = "gpt-4o-mini" , provider : str = "openai" ) -> MagicMock :
38- llm = MagicMock ()
39- llm .model = model
40- llm .provider = provider
41- return llm
42-
43-
4437def make_metric (name : str , score : float = 0.8 , reason : str = "test reason" ) -> MagicMock :
4538 """Create a mock SimpleBaseMetric with a concrete ascore signature for inspect.signature."""
4639 metric = MagicMock (spec = SimpleBaseMetric )
@@ -54,29 +47,33 @@ async def ascore(user_input: str, response: str, retrieved_contexts: list) -> Me
5447 return metric
5548
5649
57- class TestInitialization :
58- def test_successful_initialization (self ):
59- metric = make_metric ("faithfulness" )
50+ class TestInit :
51+ def test_init (self , monkeypatch ):
52+ monkeypatch .setenv ("OPENAI_API_KEY" , "test" )
53+ metric = Faithfulness (llm = llm_factory ("gpt-4o-mini" , client = AsyncOpenAI ()))
6054 evaluator = RagasEvaluator (ragas_metrics = [metric ])
6155 assert evaluator .metrics == [metric ]
6256
63- def test_initialization_with_multiple_metrics (self ):
64- metrics = [make_metric ("faithfulness" ), make_metric ("answer_relevancy" )]
57+ def test_init_with_multiple_metrics (self , monkeypatch ):
58+ monkeypatch .setenv ("OPENAI_API_KEY" , "test" )
59+ llm = llm_factory ("gpt-4o-mini" , client = AsyncOpenAI ())
60+ metrics = [
61+ Faithfulness (llm = llm ),
62+ AnswerRelevancy (
63+ llm = llm ,
64+ embeddings = embedding_factory ("openai" , model = "text-embedding-3-small" , client = AsyncOpenAI ())
65+ ),
66+ ]
6567 evaluator = RagasEvaluator (ragas_metrics = metrics )
6668 assert len (evaluator .metrics ) == 2
6769
6870 def test_invalid_metrics_raises_type_error (self ):
6971 with pytest .raises (TypeError , match = "All items in ragas_metrics must be instances of SimpleBaseMetric." ):
7072 RagasEvaluator (ragas_metrics = ["not_a_metric" ])
7173
72- def test_invalid_metrics_mixed_raises_type_error (self ):
73- valid = make_metric ("faithfulness" )
74- with pytest .raises (TypeError ):
75- RagasEvaluator (ragas_metrics = [valid , "not_a_metric" ])
76-
7774
7875class TestRun :
79- def test_run_returns_metric_results_keyed_by_name (self ):
76+ def test_run_returns_result_by_metric_name (self , monkeypatch ):
8077 metric = make_metric ("faithfulness" , score = 0.9 )
8178 evaluator = RagasEvaluator (ragas_metrics = [metric ])
8279 output = evaluator .run (
@@ -172,10 +169,10 @@ def test_run_raises_on_invalid_input_types(self, invalid_input, field_name, erro
172169
173170
174171class TestSerialization :
175- def test_to_dict (self ):
176- evaluator = RagasEvaluator (
177- ragas_metrics = [ ConcreteMetric ( llm = make_llm_mock ()), ConcreteMetric ( name = "another_metric" )]
178- )
172+ def test_to_dict (self , monkeypatch ):
173+ monkeypatch . setenv ( "OPENAI_API_KEY" , "test" )
174+ llm = llm_factory ( "gpt-4o-mini" , client = AsyncOpenAI ())
175+ evaluator = RagasEvaluator ( ragas_metrics = [ ConcreteMetric ( llm = llm ), ConcreteMetric ( name = "another_metric" )] )
179176 data = evaluator .to_dict ()
180177 assert data == {
181178 "type" : "haystack_integrations.components.evaluators.ragas.evaluator.RagasEvaluator" ,
0 commit comments