Skip to content

Commit 282289d

Browse files
committed
Deeppevals cache miss fixed
1 parent 17da61a commit 282289d

3 files changed

Lines changed: 17 additions & 0 deletions

File tree

src/lightspeed_evaluation/core/llm/deepeval.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,16 @@
44
This ensures DeepEval's LiteLLMModel uses the patched completion functions.
55
"""
66

7+
import asyncio
8+
import logging
79
import os
810
from typing import Any
911

1012
import litellm
1113
from deepeval.models import LiteLLMModel
1214

15+
logger = logging.getLogger(__name__)
16+
1317

1418
class DeepEvalLLMManager:
1519
"""DeepEval LLM Manager - Takes LLM parameters directly.
@@ -68,3 +72,11 @@ def get_model_info(self) -> dict[str, Any]:
6872
"timeout": self.llm_params.get("timeout"),
6973
"num_retries": self.llm_params.get("num_retries", 3),
7074
}
75+
76+
@staticmethod
77+
def flush_deepevals_pending_tasks() -> None:
78+
"""Flush background tasks left pending by DeepEvals async_mode."""
79+
try:
80+
asyncio.run(asyncio.sleep(0))
81+
except RuntimeError as e:
82+
logger.debug("Could not flush DeepEval pending tasks: %s", e)

src/lightspeed_evaluation/core/metrics/deepeval.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ def _build_conversational_test_case(self, conv_data: Any) -> ConversationalTestC
8686
def _evaluate_metric(self, metric: Any, test_case: Any) -> tuple[float, str]:
8787
"""Evaluate and get result."""
8888
metric.measure(test_case)
89+
self.llm_manager.flush_deepevals_pending_tasks()
8990

9091
reason = (
9192
metric.reason

src/lightspeed_evaluation/core/metrics/geval.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,8 @@ def _evaluate_turn( # pylint: disable=R0913,R0917
295295
# Evaluate (DeepEval normalizes score to [0, 1]; pass through as-is)
296296
try:
297297
metric.measure(test_case)
298+
self.deepeval_llm_manager.flush_deepevals_pending_tasks()
299+
298300
score = metric.score if metric.score is not None else 0.0
299301
reason = (
300302
str(metric.reason)
@@ -402,6 +404,8 @@ def _evaluate_conversation( # pylint: disable=R0913,R0917,R0914
402404
# Evaluate (DeepEval normalizes score to [0, 1]; pass through as-is)
403405
try:
404406
metric.measure(test_case)
407+
self.deepeval_llm_manager.flush_deepevals_pending_tasks()
408+
405409
score = metric.score if metric.score is not None else 0.0
406410
reason = (
407411
str(metric.reason)

0 commit comments

Comments
 (0)