nadeem4
diff --git a/‎audit/remediation_plan.md‎
Lines changed: 3 additions & 2 deletions b/‎audit/remediation_plan.md‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎packages/cli/src/nl2sql_cli/commands/indexing.py‎
Lines changed: 55 additions & 44 deletions b/‎packages/cli/src/nl2sql_cli/commands/indexing.py‎
Lines changed: 55 additions & 44 deletions
diff --git a/‎packages/cli/src/nl2sql_cli/main.py‎
Lines changed: 1 addition & 1 deletion b/‎packages/cli/src/nl2sql_cli/main.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎packages/core/src/nl2sql/datasources/registry.py‎
Lines changed: 86 additions & 35 deletions b/‎packages/core/src/nl2sql/datasources/registry.py‎
Lines changed: 86 additions & 35 deletions
diff --git a/‎packages/core/src/nl2sql/pipeline/nodes/intent_validator/__init__.py‎
Lines changed: 3 additions & 0 deletions b/‎packages/core/src/nl2sql/pipeline/nodes/intent_validator/__init__.py‎
Lines changed: 3 additions & 0 deletions
@@ -22,10 +22,11 @@ This document serves as the master backlog for addressing findings from the Arch
   - **Fix**: Sanitize or hash non-user-facing errors in `AggregatorNode` before prompt construction. Only show generic error codes to the LLM.
   - **Status**: Fixed. Unit tests added in `tests/unit/test_node_aggregator.py`.
 
-- [ ] **BUG-004: Schema Drift (Stale Cache)** (High)
+- [x] **BUG-004: Schema Drift (Stale Cache)** (High)
   - **Component**: Governance / Registry
   - **Issue**: `DatasourceRegistry` caches adapters indefinitely at startup. If the DB schema changes, the Planner hallucinates invalid columns.
-  - **Fix**: Implement `SchemaWatcher` or TTL-based cache revocation in `DatasourceRegistry` to force schema refresh.
+  - **Fix**: Implemented idempotent `refresh_schema` and `refresh_examples` in `OrchestratorVectorStore`, along with dynamic `register_datasource` in `DatasourceRegistry`.
+  - **Status**: Fixed. Unit tests added in `tests/unit/test_schema_lifecycle.py`.
 
 - [ ] **BUG-005: Missing Distributed Tracing** (High)
   - **Component**: Observability / Logging
 
@@ -8,30 +8,28 @@
 
 @handle_cli_errors
 def run_indexing(
-    configs: Any, # List[Dict[str, Any]]
-    vector_store_path: str, 
-    vector_store: OrchestratorVectorStore, 
-    llm_registry: Any = None
+    registry: DatasourceRegistry,
+    vector_store_path: str,
+    vector_store: OrchestratorVectorStore,
+    llm_registry: Any = None,
 ) -> None:
     """Runs the indexing process for schemas and examples.
 
     This function orchestrates the full indexing workflow:
     1. Clears existing data from the vector store.
-    2. Indexes database schemas (tables, columns, foreign keys) for all configured adapters.
-    3. Indexes example questions from the sample questions file, optionally enriching them 
-       with synthetic variants using the Semantic Analysis Node.
-    4. Displays a comprehensive summary table of indexed content.
+    2. Indexes database schemas (tables, columns, foreign keys).
+    3. Indexes example questions, optionally enriching them.
+    4. Displays a comprehensive summary.
 
     Args:
-        configs (List[Any]): List of datasource configuration objects.
-        vector_store_path (str): Path to the vector store directory.
-        vector_store (OrchestratorVectorStore): The initialized vector store instance.
-        llm_registry (Any, optional): Registry of LLMs used for semantic enrichment of examples.
+        registry: The initialized DatasourceRegistry.
+        vector_store_path: Path to the vector store directory.
+        vector_store: The initialized vector store instance.
+        llm_registry: Registry of LLMs used for semantic enrichment.
     """
     presenter = ConsolePresenter()
     presenter.print_indexing_start(vector_store_path)
-    
-    registry = DatasourceRegistry(configs)
+
     adapters = registry.list_adapters()
     stats = []
 
@@ -42,73 +40,86 @@ def run_indexing(
     presenter.console.print("\n[bold]Indexing Schemas...[/bold]")
     for adapter in adapters:
         ds_id = adapter.datasource_id
-        
+
         with presenter.console.status(f"[cyan]Indexing schema: {ds_id}...[/cyan]"):
             try:
-                schema_stats = vector_store.index_schema(adapter, datasource_id=ds_id)
-                schema_stats['id'] = ds_id
+                # Use idempotent refresh
+                schema_stats = vector_store.refresh_schema(adapter, datasource_id=ds_id)
+                schema_stats["id"] = ds_id
                 stats.append(schema_stats)
-                
-                t_count = schema_stats['tables']
-                c_count = schema_stats['columns']
-                presenter.console.print(f"  [green][OK][/green] {ds_id} [dim]({t_count} Tables, {c_count} Columns)[/dim]")
-                
+
+                t_count = schema_stats["tables"]
+                c_count = schema_stats["columns"]
+                presenter.console.print(
+                    f"  [green][OK][/green] {ds_id} [dim]({t_count} Tables, {c_count} Columns)[/dim]"
+                )
+
             except Exception as e:
                 presenter.console.print(f"  [red][FAIL][/red] {ds_id} [red]Failed: {e}[/red]")
-                stats.append({'id': ds_id, 'tables': 0, 'columns': 0, 'examples': 0, 'error': str(e)})
+                stats.append(
+                    {"id": ds_id, "tables": 0, "columns": 0, "examples": 0, "error": str(e)}
+                )
 
     from nl2sql.common.settings import settings
     import yaml
     import pathlib
-    
+
     presenter.console.print("\n[bold]Indexing Examples...[/bold]")
-    
+
     total_examples = 0
     path = pathlib.Path(settings.sample_questions_path)
-    
+
     if path.exists():
         try:
             examples_data = yaml.safe_load(path.read_text()) or {}
-            
+
             def get_stat_entry(ds_id):
                 for s in stats:
-                    if s['id'] == ds_id: return s
-                new_s = {'id': ds_id, 'tables': 0, 'columns': 0, 'examples': 0}
+                    if s["id"] == ds_id:
+                        return s
+                new_s = {"id": ds_id, "tables": 0, "columns": 0, "examples": 0}
                 stats.append(new_s)
                 return new_s
 
             enricher = None
             if llm_registry:
                 try:
                     from nl2sql.pipeline.nodes.semantic.node import SemanticAnalysisNode
+
                     enricher = SemanticAnalysisNode(llm_registry.semantic_llm())
                 except Exception as e:
                     presenter.print_warning(f"Could not load SemanticNode: {e}")
             else:
-                presenter.console.print("  [yellow]![/yellow] [dim]Skipping enrichment (No LLM config)[/dim]")
+                presenter.console.print(
+                    "  [yellow]![/yellow] [dim]Skipping enrichment (No LLM config)[/dim]"
+                )
 
             for ds_id, questions in examples_data.items():
                 with presenter.console.status(f"[cyan]Indexing examples for {ds_id}...[/cyan]"):
                     try:
-                        docs = vector_store.prepare_examples_for_datasource(ds_id, questions, enricher)
-                        vector_store.add_documents(docs)
-                        
-                        count = len(docs)
+                        # Use idempotent refresh
+                        count = vector_store.refresh_examples(ds_id, questions, enricher)
                         total_examples += count
-                        
+
                         # Update stats
                         entry = get_stat_entry(ds_id)
-                        entry['examples'] = count
-                        
-                        presenter.console.print(f"  [green][OK][/green] {ds_id} [dim]({count} examples)[/dim]")
-                        
+                        entry["examples"] = count
+
+                        presenter.console.print(
+                            f"  [green][OK][/green] {ds_id} [dim]({count} examples)[/dim]"
+                        )
+
                     except Exception as e:
-                        presenter.console.print(f"  [red][FAIL][/red] {ds_id} [red]Failed: {e}[/red]")
-                        
+                        presenter.console.print(
+                            f"  [red][FAIL][/red] {ds_id} [red]Failed: {e}[/red]"
+                        )
+
         except Exception as e:
-             presenter.console.print(f"  [red][FAIL][/red] Failed to load {path}: {e}")
+            presenter.console.print(f"  [red][FAIL][/red] Failed to load {path}: {e}")
     else:
-        presenter.console.print(f"  [yellow]![/yellow] [dim]No examples file found at {path}[/dim]")
-            
+        presenter.console.print(
+            f"  [yellow]![/yellow] [dim]No examples file found at {path}[/dim]"
+        )
+
     presenter.print_indexing_summary(stats)
     presenter.print_indexing_complete()
@@ -128,7 +128,7 @@ def index(
         vector_store_path=vector_store
     )
 
-    run_indexing(ctx.registry.get_all(), vector_store, ctx.vector_store, ctx.llm_registry)
+    run_indexing(ctx.registry, vector_store, ctx.vector_store, ctx.llm_registry)
 
 @app.command()
 def doctor():
 
@@ -1,4 +1,4 @@
-from typing import Dict, Type, List, Any
+from typing import Dict, Type, List, Any, Union
 import importlib
 from nl2sql_adapter_sdk import DatasourceAdapter
 from nl2sql.datasources.discovery import discover_adapters
@@ -43,52 +43,88 @@ def __init__(self, configs: List[Dict[str, Any]]):
         """Initializes the registry by eagerly creating adapters for all configs.
 
         Args:
-            configs (List[Any]): List of datasource configuration objects (Dict or DatasourceConfig).
+            configs: List of datasource configuration objects (Dict or DatasourceConfig).
         """
         self._adapters: Dict[str, DatasourceAdapter] = {}
-        available_adapters = discover_adapters()
+        self._available_adapters = discover_adapters()
 
         for config in configs:
             try:
-                # Normalize Pydantic Model to Dict
-                if hasattr(config, "model_dump"):
-                    config = config.model_dump()
-
-                ds_id = config.get("id")
-                if not ds_id:
-                    raise ValueError("Datasource ID is required. Please check your configuration.")
-                
-                connection = config.get("connection", {})
-                conn_type = connection.get("type", "").lower()
-                resolved_connection = self.resolved_connection(connection)
-
-                if conn_type in available_adapters:
-                    AdapterCls = available_adapters[conn_type]
-                    
-                    adapter = AdapterCls(
-                        datasource_id=ds_id,
-                        datasource_engine_type=conn_type,
-                        connection_args=resolved_connection,
-                        statement_timeout_ms=config.get("statement_timeout_ms"),
-                        row_limit=config.get("row_limit"),
-                        max_bytes=config.get("max_bytes")
-                    )
-                    self._adapters[ds_id] = adapter
-                else:
-                    raise ValueError(f"No adapter found for engine type: '{conn_type}' in datasource '{ds_id}'")
-            
+                self.register_datasource(config)
             except Exception as e:
+                # Log usage would be better here, but we raise to stop startup on bad config
                 raise ValueError(f"Failed to initialize adapter for '{config.get('id', 'unknown')}': {e}") from e
 
+    def register_datasource(self, config: Union[Dict[str, Any], Any]) -> DatasourceAdapter:
+        """Registers a new datasource dynamically.
+
+        Args:
+            config: The datasource configuration dictionary or object.
+
+        Returns:
+            DatasourceAdapter: The created and registered adapter.
+
+        Raises:
+            ValueError: If configuration is invalid or adapter type is unknown.
+        """
+        # Normalize Pydantic Model to Dict
+        if hasattr(config, "model_dump"):
+            config = config.model_dump()
+
+        ds_id = config.get("id")
+        if not ds_id:
+            raise ValueError("Datasource ID is required. Please check your configuration.")
+
+        connection = config.get("connection", {})
+        conn_type = connection.get("type", "").lower()
+        resolved_connection = self.resolved_connection(connection)
+
+        if conn_type in self._available_adapters:
+            adapter_cls = self._available_adapters[conn_type]
+
+            adapter = adapter_cls(
+                datasource_id=ds_id,
+                datasource_engine_type=conn_type,
+                connection_args=resolved_connection,
+                statement_timeout_ms=config.get("statement_timeout_ms"),
+                row_limit=config.get("row_limit"),
+                max_bytes=config.get("max_bytes"),
+            )
+            self._adapters[ds_id] = adapter
+            return adapter
+        else:
+            raise ValueError(
+                f"No adapter found for engine type: '{conn_type}' in datasource '{ds_id}'"
+            )
+
+    def refresh_schema(self, datasource_id: str, vector_store: Any) -> Dict[str, int]:
+        """Refreshes the schema for a specific datasource.
+        
+        This triggers a fresh intrusion of the database schema via the adapter
+        and updates the vector store index.
+
+        Args:
+            datasource_id: The ID of the datasource to refresh.
+            vector_store: The OrchestratorVectorStore instance.
+
+        Returns:
+            Dict[str, int]: Statistics of the refreshed components.
+
+        Raises:
+            ValueError: If the datasource ID is unknown.
+        """
+        adapter = self.get_adapter(datasource_id)
+        return vector_store.refresh_schema(adapter, datasource_id)
+
     def get_adapter(self, datasource_id: str) -> DatasourceAdapter:
         """Retrieves the DataSourceAdapter for a datasource.
 
         Args:
-            datasource_id (str): The ID of the datasource.
+            datasource_id: The ID of the datasource.
 
         Returns:
             DatasourceAdapter: The active adapter instance.
-        
+
         Raises:
             ValueError: If the datasource ID is unknown.
         """
@@ -97,13 +133,28 @@ def get_adapter(self, datasource_id: str) -> DatasourceAdapter:
         return self._adapters[datasource_id]
 
     def get_dialect(self, datasource_id: str) -> str:
-        """Returns a normalized dialect string from the adapter."""
+        """Returns a normalized dialect string from the adapter.
+
+        Args:
+            datasource_id: The ID of the datasource.
+
+        Returns:
+            str: The dialect string (e.g., 'postgres').
+        """
         return self.get_adapter(datasource_id).get_dialect()
 
     def list_adapters(self) -> List[DatasourceAdapter]:
-        """Returns a list of all registered adapters."""
+        """Returns a list of all registered adapters.
+
+        Returns:
+            List[DatasourceAdapter]: All active adapters.
+        """
         return list(self._adapters.values())
 
     def list_ids(self) -> List[str]:
-        """Returns a list of all registered datasource IDs."""
+        """Returns a list of all registered datasource IDs.
+
+        Returns:
+            List[str]: All registered IDs.
+        """
         return list(self._adapters.keys())
@@ -0,0 +1,3 @@
+from .node import IntentValidatorNode
+
+__all__ = ["IntentValidatorNode"]
Original file line number	Diff line number	Diff line change
`@@ -128,7 +128,7 @@ def index(`
`128`	`128`	`vector_store_path=vector_store`
`129`	`129`	`)`
`130`	`130`
`131`		`- run_indexing(ctx.registry.get_all(), vector_store, ctx.vector_store, ctx.llm_registry)`
	`131`	`+ run_indexing(ctx.registry, vector_store, ctx.vector_store, ctx.llm_registry)`
`132`	`132`
`133`	`133`	`@app.command()`
`134`	`134`	`def doctor():`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+from .node import IntentValidatorNode`
	`2`	`+`
	`3`	`+__all__ = ["IntentValidatorNode"]`