lightspeed-core
diff --git a/‎config/system.yaml‎
Lines changed: 2 additions & 10 deletions b/‎config/system.yaml‎
Lines changed: 2 additions & 10 deletions
diff --git a/‎src/lightspeed_evaluation/core/constants.py‎
Lines changed: 4 additions & 3 deletions b/‎src/lightspeed_evaluation/core/constants.py‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎src/lightspeed_evaluation/core/models/__init__.py‎
Lines changed: 5 additions & 3 deletions b/‎src/lightspeed_evaluation/core/models/__init__.py‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎src/lightspeed_evaluation/core/models/agents.py‎
Lines changed: 2 additions & 4 deletions b/‎src/lightspeed_evaluation/core/models/agents.py‎
Lines changed: 2 additions & 4 deletions
@@ -5,6 +5,8 @@ core:
   max_threads: 50             # Maximum number of threads, set to null for Python default. 50 is OK for bigger datasets
   fail_on_invalid_data: true  # If False don't fail on invalid conversations (like missing context for some metrics)
   skip_on_failure: false      # If True, skip remaining turns when a turn evaluation fails (can be overridden per conversation)
+  cache_enabled: true         # Global cache toggle, if True LLM as a judge, embeddings and API queries are cached
+  cache_base_dir: .caches     # Global base cache dir (queries cached separately under /llm (LLM as a judge + embeddings) and /api)
 
 # LLM as a judge configuration (Legacy)
 # Deprecated: top-level llm: (single judge) will be removed — use llm_pool + judge_panel only.
@@ -17,15 +19,11 @@ core:
 #   max_tokens: 512                     # Maximum tokens in response
 #   timeout: 300                        # Request timeout in seconds
 #   num_retries: 3                      # Retry attempts
-#   cache_dir: ".caches/llm_cache"      # Directory with LLM cache
-#   cache_enabled: true                 # Is LLM cache enabled?
 
 # Pool of named models (judges reference these IDs)
 # Default values merge into each model; parameters supports extra provider keys; null removes an inherited parameter.
 llm_pool:
   defaults:
-    cache_enabled: true
-    cache_dir: ".caches/llm_cache"
     timeout: 300
     num_retries: 3
     parameters:
@@ -64,9 +62,6 @@ embedding:
   provider: "openai"
   model: "text-embedding-3-small"
   provider_kwargs: {}
-  cache_dir: ".caches/embedding_cache"
-  cache_enabled: true
-
 
 # Lightspeed-stack API Configuration
 # To get real time data. Currently it supports lightspeed-stack API.
@@ -90,9 +85,6 @@ api:
   # Example: extra_request_params:
   #            mode: troubleshooting
   extra_request_params: null
-
-  cache_dir: ".caches/api_cache"  # Directory with lightspeed-stack cache
-  cache_enabled: true                  # Is lightspeed-stack cache enabled?
 
   # MCP Server Authentication Configuration
   mcp_headers:
 
@@ -50,14 +50,17 @@
     SIMILARITY_JARO_WINKLER: DistanceMeasure.JARO_WINKLER,
 }
 
+# Cache configuration
+DEFAULT_CACHE_BASE_DIR = ".caches"
+DEFAULT_API_CACHE_SUBDIR = "api"
+DEFAULT_LLM_CACHE_SUBDIR = "llm"
 
 # API Constants
 DEFAULT_API_BASE = "http://localhost:8080"
 DEFAULT_API_VERSION = "v1"
 DEFAULT_API_TIMEOUT = 300
 DEFAULT_ENDPOINT_TYPE = "streaming"
 SUPPORTED_ENDPOINT_TYPES = ["streaming", "query", "infer"]
-DEFAULT_API_CACHE_DIR = ".caches/api_cache"
 
 DEFAULT_API_NUM_RETRIES = 3
 
@@ -75,11 +78,9 @@
 DEFAULT_LLM_TEMPERATURE = 0.0
 DEFAULT_LLM_MAX_TOKENS = 512
 DEFAULT_LLM_RETRIES = 3
-DEFAULT_LLM_CACHE_DIR = ".caches/llm_cache"
 
 DEFAULT_EMBEDDING_PROVIDER = "openai"
 DEFAULT_EMBEDDING_MODEL = "text-embedding-3-small"
-DEFAULT_EMBEDDING_CACHE_DIR = ".caches/embedding_cache"
 
 DEFAULT_OUTPUT_DIR = "./eval_output"
 DEFAULT_BASE_FILENAME = "evaluation"
 
@@ -25,15 +25,17 @@
 from lightspeed_evaluation.core.models.system import (
     APIConfig,
     CoreConfig,
+    LoggingConfig,
+    SystemConfig,
+    VisualizationConfig,
+)
+from lightspeed_evaluation.core.models.llm import (
     EmbeddingConfig,
     GEvalConfig,
     GEvalRubricConfig,
     JudgePanelConfig,
     LLMConfig,
     LLMPoolConfig,
-    LoggingConfig,
-    SystemConfig,
-    VisualizationConfig,
 )
 from lightspeed_evaluation.core.models.statistics import (
     NumericStats,
 
@@ -7,7 +7,6 @@
 
 from lightspeed_evaluation.core.constants import (
     DEFAULT_API_BASE,
-    DEFAULT_API_CACHE_DIR,
     DEFAULT_API_NUM_RETRIES,
     DEFAULT_API_TIMEOUT,
     DEFAULT_API_VERSION,
@@ -101,9 +100,8 @@ class HttpApiBaseFields(BaseModel):
         default=None, description="System prompt for API calls"
     )
     extra_request_params: Optional[dict[str, Any]] = Field(default=None)
-    cache_dir: str = Field(
-        default=DEFAULT_API_CACHE_DIR,
-        min_length=1,
+    cache_dir: Optional[str] = Field(
+        default=None,
         description="Location of cached API queries",
     )
     cache_enabled: bool = Field(