diff --git a/config/system.yaml b/config/system.yaml index ef44a0cb..7ae81de6 100644 --- a/config/system.yaml +++ b/config/system.yaml @@ -6,7 +6,7 @@ core: fail_on_invalid_data: true # If False don't fail on invalid conversations (like missing context for some metrics) skip_on_failure: false # If True, skip remaining turns when a turn evaluation fails (can be overridden per conversation) cache_enabled: true # Global cache toggle, if True LLM as a judge, embeddings and API queries are cached - cache_base_dir: .caches # Global base cache dir (queries cached separately under /llm (LLM as a judge + embeddings) and /api) + cache_base_dir: .caches # Global base cache dir (queries cached separately under /llm (LLM as a judge + embeddings) and /agent for agent API calls) # LLM as a judge configuration (Legacy) # Deprecated: top-level llm: (single judge) will be removed — use llm_pool + judge_panel only. diff --git a/docs/configuration.md b/docs/configuration.md index 05f86ce6..2b1e8fa7 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -8,13 +8,17 @@ The system configuration is driven by YAML file. The default config file is [con | max_threads | `50` | Maximum number of threads, set to null for Python default. 50 is OK on a typical laptop. Check your Judge-LLM service for max requests per minute | | fail_on_invalid_data | `true` | If `false` don't fail on invalid conversations (like missing `context` field for some metrics) | | skip_on_failure | `false` | If `true`, skip remaining turns and conversation metrics when a turn evaluation fails (FAIL or ERROR). Can be overridden per conversation in the input data yaml file. | +| cache_enabled | `true` | Global caching toggle for embeddings, agent API, and LLM judge queries. (_Component-level cache settings are deprecated._) | +| cache_base_dir | `".caches"` | Base directory for all evaluation caches (embeddings, agent, LLM judge). Component-specific subdirectories are appended automatically (`/llm` for LLM-as-a-judge and `/agent` for agent API calls). | ### Example ```yaml core: max_threads: 50 fail_on_invalid_data: true - skip_on_failure: false # Set to true to stop evaluation on first failure + skip_on_failure: false # Set to true to stop evaluation on first failure + cache_enabled: true # Global cache toggle (affects all components) + cache_base_dir: ".caches" # Base cache directory ``` ## LLM Pool @@ -25,7 +29,7 @@ Define a centralized pool of LLM configurations for the Judge Panel feature. | Setting | Description | |---------|-------------| -| `llm_pool.defaults.cache_dir` | Cache directory (default: `.caches/llm_cache`) | +| `llm_pool.defaults.cache_dir` | Cache directory (default: `.caches/llm_cache`) (_deprecated - use `core.cache_base_dir`_) | | `llm_pool.defaults.timeout` | Request timeout in seconds (default: `300`) | | `llm_pool.defaults.num_retries` | Retry attempts (default: `3`) | | `llm_pool.defaults.parameters.temperature` | Sampling temperature | @@ -113,8 +117,8 @@ This section configures LLM. It is used when `judge_panel` is not configured (ev | max_tokens | `512` | Maximum tokens in response | | timeout | `300` | Request timeout in seconds | | num_retries | `3` | Maximum retry attempts | -| cache_dir | `".caches/llm_cache"` | Directory with cached LLM responses | -| cache_enabled | `true` | Is LLM cache enabled? | +| cache_dir | `".caches/llm_cache"` | Directory with cached LLM responses (_deprecated - use `core.cache_base_dir`_) | +| cache_enabled | `true` | Is LLM cache enabled? (_deprecated - use `core.cache_enabled`_) | Dynamic LLM parameters are only supported through `llm_pool` config. To use dynamic parameters, migrate to `llm_pool`. @@ -128,8 +132,8 @@ Some Ragas metrics use embeddings to compute similarity between generated answer | provider | `"openai"` | Supported providers: `"openai"`, `"gemini"` or `"huggingface"`. `"huggingface"` downloads the model to the local machine and runs inference locally (requires optional dependencies). | | model | `"text-embedding-3-small"` | Model name for the provider | | provider_kwargs | `{}` | Optional arguments for the model | -| cache_dir | `".caches/embedding_cache"` | Directory with cached embeddings | -| cache_enabled | `true` | Is embeddings cache enabled? | +| cache_dir | `".caches/embedding_cache"` | Directory with cached embeddings (_deprecated - use `core.cache_base_dir`_) | +| cache_enabled | `true` | Is embeddings cache enabled? (_deprecated - use `core.cache_enabled`_) | #### Remote vs Local Embedding Models @@ -201,8 +205,8 @@ Note that it can be easily integrated with other APIs with a minimal change. | model | `"gpt-4o-mini"` | Model to use for API queries (optional) | | no_tools | `null` | Whether to bypass tools (optional) | | system_prompt | `null` | Custom system prompt (optional) | -| cache_dir | `".caches/api_cache"` | Directory with cached API responses | -| cache_enabled | `true` | Is API cache enabled? | +| cache_dir | `".caches/api_cache"` | Directory with cached API responses (_deprecated - use `core.cache_base_dir`_) | +| cache_enabled | `true` | Is API cache enabled? (_deprecated - use `core.cache_enabled`_) | | mcp_headers | `null` | MCP headers configuration for authentication (see below) | | num_retries | `3` | Maximum number of retry attempts for API calls on 429 errors | diff --git a/src/lightspeed_evaluation/core/constants.py b/src/lightspeed_evaluation/core/constants.py index 5d369e23..9ca59f5d 100644 --- a/src/lightspeed_evaluation/core/constants.py +++ b/src/lightspeed_evaluation/core/constants.py @@ -52,7 +52,7 @@ # Cache configuration DEFAULT_CACHE_BASE_DIR = ".caches" -DEFAULT_API_CACHE_SUBDIR = "api" +DEFAULT_AGENT_CACHE_SUBDIR = "agent" DEFAULT_LLM_CACHE_SUBDIR = "llm" # API Constants diff --git a/src/lightspeed_evaluation/core/models/llm.py b/src/lightspeed_evaluation/core/models/llm.py index c73fa633..84d4a9cc 100644 --- a/src/lightspeed_evaluation/core/models/llm.py +++ b/src/lightspeed_evaluation/core/models/llm.py @@ -171,6 +171,10 @@ class EmbeddingConfig(BaseModel): default=None, description="Embedding provider arguments, e.g. model_kwargs: device:cpu", ) + cache_dir: Optional[str] = Field( + default=None, + description="(Deprecated) Location of cached embedding queries", + ) cache_enabled: bool = Field( default=True, description="Is caching of embedding queries enabled?" ) diff --git a/src/lightspeed_evaluation/core/models/system.py b/src/lightspeed_evaluation/core/models/system.py index 12afc25e..2a5d2cc6 100644 --- a/src/lightspeed_evaluation/core/models/system.py +++ b/src/lightspeed_evaluation/core/models/system.py @@ -15,7 +15,7 @@ ) from lightspeed_evaluation.core.constants import ( - DEFAULT_API_CACHE_SUBDIR, + DEFAULT_AGENT_CACHE_SUBDIR, DEFAULT_CACHE_BASE_DIR, DEFAULT_LLM_CACHE_SUBDIR, DEFAULT_LOG_FORMAT, @@ -353,11 +353,13 @@ def global_cache_setup(self) -> "SystemConfig": global_cache_base_dir = self.core.cache_base_dir # Build cache paths llm_cache_path = os.path.join(global_cache_base_dir, DEFAULT_LLM_CACHE_SUBDIR) - api_cache_path = os.path.join(global_cache_base_dir, DEFAULT_API_CACHE_SUBDIR) + api_cache_path = os.path.join(global_cache_base_dir, DEFAULT_AGENT_CACHE_SUBDIR) # If component-level caching is enabled, warn user has_api_cache_config = self.api.cache_enabled is False or self.api.cache_dir - has_embedding_cache_config = self.embedding.cache_enabled is False + has_embedding_cache_config = ( + self.embedding.cache_enabled is False or self.embedding.cache_dir + ) has_llm_pool_cache_config = self.llm_pool and ( self.llm_pool.defaults.cache_enabled is False or self.llm_pool.defaults.cache_dir diff --git a/tests/integration/system-config-query.yaml b/tests/integration/system-config-query.yaml index 3c107c97..56f60f6f 100644 --- a/tests/integration/system-config-query.yaml +++ b/tests/integration/system-config-query.yaml @@ -24,6 +24,7 @@ embedding: provider: "openai" model: "text-embedding-3-small" provider_kwargs: {} + cache_dir: ".caches/embedding_cache" cache_enabled: false # Lightspeed-stack API Configuration diff --git a/tests/integration/system-config-streaming.yaml b/tests/integration/system-config-streaming.yaml index b74c65cd..c7c9bde4 100644 --- a/tests/integration/system-config-streaming.yaml +++ b/tests/integration/system-config-streaming.yaml @@ -24,6 +24,7 @@ embedding: provider: "openai" model: "text-embedding-3-small" provider_kwargs: {} + cache_dir: ".caches/embedding_cache" cache_enabled: false # Lightspeed-stack API Configuration diff --git a/tests/unit/core/models/test_system.py b/tests/unit/core/models/test_system.py index 22fa93c8..6e5bf344 100644 --- a/tests/unit/core/models/test_system.py +++ b/tests/unit/core/models/test_system.py @@ -593,7 +593,7 @@ def test_global_default_cache_setup_legacy_llm_support(self) -> None: assert config.embedding.cache_enabled is True assert config.api.cache_enabled is True - assert config.api.cache_dir == ".caches/api" + assert config.api.cache_dir == ".caches/agent" def test_global_cache_setup_legacy_llm_support_override(self) -> None: """Test cache setup with custom legacy LLM config overrides.""" @@ -611,7 +611,7 @@ def test_global_cache_setup_legacy_llm_support_override(self) -> None: assert config.embedding.cache_enabled is True assert config.api.cache_enabled is True - assert config.api.cache_dir == ".caches/api" + assert config.api.cache_dir == ".caches/agent" def test_global_default_cache_setup_with_judge_panel(self) -> None: """Test default cache setup with judge panel configuration.""" @@ -643,7 +643,7 @@ def test_global_default_cache_setup_with_judge_panel(self) -> None: assert config.embedding.cache_enabled is True assert config.api.cache_enabled is True - assert config.api.cache_dir == ".caches/api" + assert config.api.cache_dir == ".caches/agent" def test_global_cache_setup_pool_without_panel(self) -> None: """Test that global_cache_setup populates llm_pool.defaults when judge_panel is absent.""" @@ -690,7 +690,7 @@ def test_global_cache_turned_off_with_judge_panel(self) -> None: assert config.embedding.cache_enabled is False assert config.api.cache_enabled is False - assert config.api.cache_dir == ".caches_test/api" + assert config.api.cache_dir == ".caches_test/agent" def test_global_cache_setup_with_judge_panel_override(self) -> None: """Test cache setup with custom judge panel config overrides."""