lightspeed-core · asamal4 · May 20, 2026 · May 20, 2026
diff --git a/config/system.yaml b/config/system.yaml
@@ -6,7 +6,7 @@ core:
   fail_on_invalid_data: true  # If False don't fail on invalid conversations (like missing context for some metrics)
   skip_on_failure: false      # If True, skip remaining turns when a turn evaluation fails (can be overridden per conversation)
   cache_enabled: true         # Global cache toggle, if True LLM as a judge, embeddings and API queries are cached
-  cache_base_dir: .caches     # Global base cache dir (queries cached separately under /llm (LLM as a judge + embeddings) and /api)
+  cache_base_dir: .caches     # Global base cache dir (queries cached separately under /llm (LLM as a judge + embeddings) and /agent for agent API calls)
 
 # LLM as a judge configuration (Legacy)
 # Deprecated: top-level llm: (single judge) will be removed — use llm_pool + judge_panel only.

diff --git a/docs/configuration.md b/docs/configuration.md
@@ -8,13 +8,17 @@ The system configuration is driven by YAML file. The default config file is [con
 | max_threads    | `50` | Maximum number of threads, set to null for Python default. 50 is OK on a typical laptop. Check your Judge-LLM service for max requests per minute |
 | fail_on_invalid_data | `true` | If `false` don't fail on invalid conversations (like missing `context` field for some metrics) |
 | skip_on_failure | `false` | If `true`, skip remaining turns and conversation metrics when a turn evaluation fails (FAIL or ERROR). Can be overridden per conversation in the input data yaml file. |
+| cache_enabled | `true` | Global caching toggle for embeddings, agent API, and LLM judge queries. (_Component-level cache settings are deprecated._) |
+| cache_base_dir | `".caches"` | Base directory for all evaluation caches (embeddings, agent, LLM judge). Component-specific subdirectories are appended automatically (`/llm` for LLM-as-a-judge and `/agent` for agent API calls). |
 
 ### Example
 ```yaml
 core:
   max_threads: 50
   fail_on_invalid_data: true
-  skip_on_failure: false  # Set to true to stop evaluation on first failure
+  skip_on_failure: false      # Set to true to stop evaluation on first failure
+  cache_enabled: true         # Global cache toggle (affects all components)
+  cache_base_dir: ".caches"   # Base cache directory
 ```
 
 ## LLM Pool
@@ -25,7 +29,7 @@ Define a centralized pool of LLM configurations for the Judge Panel feature.
 
 | Setting | Description |
 |---------|-------------|
-| `llm_pool.defaults.cache_dir` | Cache directory (default: `.caches/llm_cache`) |
+| `llm_pool.defaults.cache_dir` | Cache directory (default: `.caches/llm_cache`) (_deprecated - use `core.cache_base_dir`_) |
 | `llm_pool.defaults.timeout` | Request timeout in seconds (default: `300`) |
 | `llm_pool.defaults.num_retries` | Retry attempts (default: `3`) |
 | `llm_pool.defaults.parameters.temperature` | Sampling temperature |
@@ -113,8 +117,8 @@ This section configures LLM. It is used when `judge_panel` is not configured (ev
 | max_tokens |  `512` | Maximum tokens in response |
 | timeout | `300` | Request timeout in seconds |
 | num_retries | `3` | Maximum retry attempts |
-| cache_dir | `".caches/llm_cache"` | Directory with cached LLM responses |
-| cache_enabled | `true` | Is LLM cache enabled? |
+| cache_dir | `".caches/llm_cache"` | Directory with cached LLM responses (_deprecated - use `core.cache_base_dir`_) |
+| cache_enabled | `true` | Is LLM cache enabled? (_deprecated - use `core.cache_enabled`_) |
 
 Dynamic LLM parameters are only supported through `llm_pool` config. To use dynamic parameters, migrate to `llm_pool`.
 
@@ -128,8 +132,8 @@ Some Ragas metrics use embeddings to compute similarity between generated answer
 | provider | `"openai"` | Supported providers: `"openai"`, `"gemini"` or `"huggingface"`. `"huggingface"` downloads the model to the local machine and runs inference locally (requires optional dependencies).  |
 | model | `"text-embedding-3-small"` | Model name for the provider |
 | provider_kwargs | `{}` | Optional arguments for the model |
-| cache_dir | `".caches/embedding_cache"` | Directory with cached embeddings |
-| cache_enabled | `true` | Is embeddings cache enabled? |
+| cache_dir | `".caches/embedding_cache"` | Directory with cached embeddings (_deprecated - use `core.cache_base_dir`_) |
+| cache_enabled | `true` | Is embeddings cache enabled? (_deprecated - use `core.cache_enabled`_) |
 
 #### Remote vs Local Embedding Models
 
@@ -201,8 +205,8 @@ Note that it can be easily integrated with other APIs with a minimal change.
 | model | `"gpt-4o-mini"` | Model to use for API queries (optional) |
 | no_tools | `null` | Whether to bypass tools (optional) |
 | system_prompt | `null` | Custom system prompt (optional) |
-| cache_dir | `".caches/api_cache"` | Directory with cached API responses |
-| cache_enabled | `true` | Is API cache enabled? |
+| cache_dir | `".caches/api_cache"` | Directory with cached API responses (_deprecated - use `core.cache_base_dir`_) |
+| cache_enabled | `true` | Is API cache enabled? (_deprecated - use `core.cache_enabled`_) |
 | mcp_headers | `null` | MCP headers configuration for authentication (see below) |
 | num_retries | `3` | Maximum number of retry attempts for API calls on 429 errors |
 

diff --git a/src/lightspeed_evaluation/core/constants.py b/src/lightspeed_evaluation/core/constants.py
@@ -52,7 +52,7 @@
 
 # Cache configuration
 DEFAULT_CACHE_BASE_DIR = ".caches"
-DEFAULT_API_CACHE_SUBDIR = "api"
+DEFAULT_AGENT_CACHE_SUBDIR = "agent"
 DEFAULT_LLM_CACHE_SUBDIR = "llm"
 
 # API Constants

diff --git a/src/lightspeed_evaluation/core/models/llm.py b/src/lightspeed_evaluation/core/models/llm.py
@@ -171,6 +171,10 @@ class EmbeddingConfig(BaseModel):
         default=None,
         description="Embedding provider arguments, e.g. model_kwargs: device:cpu",
     )
+    cache_dir: Optional[str] = Field(
+        default=None,
+        description="(Deprecated) Location of cached embedding queries",
+    )
     cache_enabled: bool = Field(
         default=True, description="Is caching of embedding queries enabled?"
     )

diff --git a/src/lightspeed_evaluation/core/models/system.py b/src/lightspeed_evaluation/core/models/system.py
@@ -15,7 +15,7 @@
 )
 
 from lightspeed_evaluation.core.constants import (
-    DEFAULT_API_CACHE_SUBDIR,
+    DEFAULT_AGENT_CACHE_SUBDIR,
     DEFAULT_CACHE_BASE_DIR,
     DEFAULT_LLM_CACHE_SUBDIR,
     DEFAULT_LOG_FORMAT,
@@ -353,11 +353,13 @@ def global_cache_setup(self) -> "SystemConfig":
         global_cache_base_dir = self.core.cache_base_dir
         # Build cache paths
         llm_cache_path = os.path.join(global_cache_base_dir, DEFAULT_LLM_CACHE_SUBDIR)
-        api_cache_path = os.path.join(global_cache_base_dir, DEFAULT_API_CACHE_SUBDIR)
+        api_cache_path = os.path.join(global_cache_base_dir, DEFAULT_AGENT_CACHE_SUBDIR)
 
         # If component-level caching is enabled, warn user
         has_api_cache_config = self.api.cache_enabled is False or self.api.cache_dir
-        has_embedding_cache_config = self.embedding.cache_enabled is False
+        has_embedding_cache_config = (
+            self.embedding.cache_enabled is False or self.embedding.cache_dir
+        )
         has_llm_pool_cache_config = self.llm_pool and (
             self.llm_pool.defaults.cache_enabled is False
             or self.llm_pool.defaults.cache_dir

diff --git a/tests/integration/system-config-query.yaml b/tests/integration/system-config-query.yaml
@@ -24,6 +24,7 @@ embedding:
   provider: "openai"
   model: "text-embedding-3-small"
   provider_kwargs: {}
+  cache_dir: ".caches/embedding_cache"
   cache_enabled: false
 
 # Lightspeed-stack API Configuration

diff --git a/tests/integration/system-config-streaming.yaml b/tests/integration/system-config-streaming.yaml
@@ -24,6 +24,7 @@ embedding:
   provider: "openai"
   model: "text-embedding-3-small"
   provider_kwargs: {}
+  cache_dir: ".caches/embedding_cache"
   cache_enabled: false
 
 # Lightspeed-stack API Configuration

diff --git a/tests/unit/core/models/test_system.py b/tests/unit/core/models/test_system.py
@@ -593,7 +593,7 @@ def test_global_default_cache_setup_legacy_llm_support(self) -> None:
         assert config.embedding.cache_enabled is True
 
         assert config.api.cache_enabled is True
-        assert config.api.cache_dir == ".caches/api"
+        assert config.api.cache_dir == ".caches/agent"
 
     def test_global_cache_setup_legacy_llm_support_override(self) -> None:
         """Test cache setup with custom legacy LLM config overrides."""
@@ -611,7 +611,7 @@ def test_global_cache_setup_legacy_llm_support_override(self) -> None:
         assert config.embedding.cache_enabled is True
 
         assert config.api.cache_enabled is True
-        assert config.api.cache_dir == ".caches/api"
+        assert config.api.cache_dir == ".caches/agent"
 
     def test_global_default_cache_setup_with_judge_panel(self) -> None:
         """Test default cache setup with judge panel configuration."""
@@ -643,7 +643,7 @@ def test_global_default_cache_setup_with_judge_panel(self) -> None:
         assert config.embedding.cache_enabled is True
 
         assert config.api.cache_enabled is True
-        assert config.api.cache_dir == ".caches/api"
+        assert config.api.cache_dir == ".caches/agent"
 
     def test_global_cache_setup_pool_without_panel(self) -> None:
         """Test that global_cache_setup populates llm_pool.defaults when judge_panel is absent."""
@@ -690,7 +690,7 @@ def test_global_cache_turned_off_with_judge_panel(self) -> None:
         assert config.embedding.cache_enabled is False
 
         assert config.api.cache_enabled is False
-        assert config.api.cache_dir == ".caches_test/api"
+        assert config.api.cache_dir == ".caches_test/agent"
 
     def test_global_cache_setup_with_judge_panel_override(self) -> None:
         """Test cache setup with custom judge panel config overrides."""