vectorize-io · dcbouius · May 14, 2026 · May 14, 2026 · May 14, 2026
diff --git a/hindsight-api-slim/hindsight_api/api/http.py b/hindsight-api-slim/hindsight_api/api/http.py
@@ -171,6 +171,12 @@ class RecallRequest(BaseModel):
         description="Compound tag filter using boolean groups. Groups in the list are AND-ed. "
         "Each group is a leaf {tags, match} or compound {and: [...]}, {or: [...]}, {not: ...}.",
     )
+    retrieval_weights: dict[str, float] | None = Field(
+        default=None,
+        description="Per-strategy weights for Reciprocal Rank Fusion. Keys: 'semantic', 'bm25', "
+        "'graph', 'temporal'. Values are multipliers (1.0 = default, 2.0 = double influence, "
+        "0.0 = disabled). Omitted keys default to the bank/server configuration.",
+    )
 
     @field_validator("query")
     @classmethod
@@ -3215,6 +3221,7 @@ async def api_recall(
                     tags=request.tags,
                     tags_match=request.tags_match,
                     tag_groups=request.tag_groups,
+                    retrieval_weights=request.retrieval_weights,
                 )
 
             # Convert core MemoryFact objects to API RecallResult objects (excluding internal metrics)

diff --git a/hindsight-api-slim/hindsight_api/config.py b/hindsight-api-slim/hindsight_api/config.py
@@ -445,6 +445,10 @@ def normalize_config_dict(config: dict[str, Any]) -> dict[str, Any]:
 ENV_RECALL_BUDGET_ADAPTIVE_HIGH = "HINDSIGHT_API_RECALL_BUDGET_ADAPTIVE_HIGH"
 ENV_RECALL_BUDGET_MIN = "HINDSIGHT_API_RECALL_BUDGET_MIN"
 ENV_RECALL_BUDGET_MAX = "HINDSIGHT_API_RECALL_BUDGET_MAX"
+ENV_RECALL_WEIGHT_SEMANTIC = "HINDSIGHT_API_RECALL_WEIGHT_SEMANTIC"
+ENV_RECALL_WEIGHT_BM25 = "HINDSIGHT_API_RECALL_WEIGHT_BM25"
+ENV_RECALL_WEIGHT_GRAPH = "HINDSIGHT_API_RECALL_WEIGHT_GRAPH"
+ENV_RECALL_WEIGHT_TEMPORAL = "HINDSIGHT_API_RECALL_WEIGHT_TEMPORAL"
 
 # Audit log settings
 ENV_AUDIT_LOG_ENABLED = "HINDSIGHT_API_AUDIT_LOG_ENABLED"
@@ -677,6 +681,14 @@ def normalize_config_dict(config: dict[str, Any]) -> dict[str, Any]:
 DEFAULT_RECALL_BUDGET_MIN = 20  # Floor for the adaptive function
 DEFAULT_RECALL_BUDGET_MAX = 2000  # Ceiling for the adaptive function
 
+# Recall retrieval weights (RRF fusion)
+# Per-strategy multipliers for Reciprocal Rank Fusion.
+# 1.0 = default (equal weight), 2.0 = double influence, 0.0 = disabled.
+DEFAULT_RECALL_WEIGHT_SEMANTIC = 1.0
+DEFAULT_RECALL_WEIGHT_BM25 = 1.0
+DEFAULT_RECALL_WEIGHT_GRAPH = 1.0
+DEFAULT_RECALL_WEIGHT_TEMPORAL = 1.0
+
 # Disposition defaults (None = not set, fall back to bank DB value or 3)
 DEFAULT_DISPOSITION_SKEPTICISM = None
 DEFAULT_DISPOSITION_LITERALISM = None
@@ -1122,6 +1134,12 @@ class HindsightConfig:
     recall_budget_min: int
     recall_budget_max: int
 
+    # Recall retrieval weights (hierarchical - can be overridden per bank or per request)
+    recall_weight_semantic: float
+    recall_weight_bm25: float
+    recall_weight_graph: float
+    recall_weight_temporal: float
+
     # Disposition settings (hierarchical - can be overridden per bank; None = fall back to DB)
     disposition_skepticism: int | None
     disposition_literalism: int | None
@@ -1261,6 +1279,11 @@ class HindsightConfig:
         "recall_budget_adaptive_high",
         "recall_budget_min",
         "recall_budget_max",
+        # Recall retrieval weights
+        "recall_weight_semantic",
+        "recall_weight_bm25",
+        "recall_weight_graph",
+        "recall_weight_temporal",
         # Disposition settings
         "disposition_skepticism",
         "disposition_literalism",
@@ -1841,6 +1864,11 @@ def from_env(cls) -> "HindsightConfig":
             ),
             recall_budget_min=int(os.getenv(ENV_RECALL_BUDGET_MIN, str(DEFAULT_RECALL_BUDGET_MIN))),
             recall_budget_max=int(os.getenv(ENV_RECALL_BUDGET_MAX, str(DEFAULT_RECALL_BUDGET_MAX))),
+            # Recall retrieval weights
+            recall_weight_semantic=float(os.getenv(ENV_RECALL_WEIGHT_SEMANTIC, str(DEFAULT_RECALL_WEIGHT_SEMANTIC))),
+            recall_weight_bm25=float(os.getenv(ENV_RECALL_WEIGHT_BM25, str(DEFAULT_RECALL_WEIGHT_BM25))),
+            recall_weight_graph=float(os.getenv(ENV_RECALL_WEIGHT_GRAPH, str(DEFAULT_RECALL_WEIGHT_GRAPH))),
+            recall_weight_temporal=float(os.getenv(ENV_RECALL_WEIGHT_TEMPORAL, str(DEFAULT_RECALL_WEIGHT_TEMPORAL))),
             # Disposition settings (None = fall back to DB value)
             disposition_skepticism=int(os.getenv(ENV_DISPOSITION_SKEPTICISM))
             if os.getenv(ENV_DISPOSITION_SKEPTICISM)

diff --git a/hindsight-api-slim/hindsight_api/engine/interface.py b/hindsight-api-slim/hindsight_api/engine/interface.py
@@ -82,6 +82,7 @@ async def recall_async(
         include_chunks: bool = False,
         max_chunk_tokens: int = 8192,
         request_context: "RequestContext",
+        retrieval_weights: dict[str, float] | None = None,
     ) -> "RecallResult":
         """
         Recall memories relevant to a query.

diff --git a/hindsight-api-slim/hindsight_api/engine/memory_engine.py b/hindsight-api-slim/hindsight_api/engine/memory_engine.py
@@ -2655,6 +2655,7 @@ async def recall_async(
         tag_groups: list[TagGroup] | None = None,
         created_after: datetime | None = None,
         created_before: datetime | None = None,
+        retrieval_weights: dict[str, float] | None = None,
         _connection_budget: int | None = None,
         _quiet: bool = False,
     ) -> RecallResultModel:
@@ -2754,6 +2755,18 @@ async def recall_async(
         budget_config_dict = await self._config_resolver.get_bank_config(bank_id, request_context)
         thinking_budget = _resolve_thinking_budget(budget_config_dict, budget, max_tokens)
 
+        # Build effective retrieval weights: config defaults + per-request overrides
+        effective_weights: dict[str, float] = {
+            "semantic": float(budget_config_dict.get("recall_weight_semantic", 1.0)),
+            "bm25": float(budget_config_dict.get("recall_weight_bm25", 1.0)),
+            "graph": float(budget_config_dict.get("recall_weight_graph", 1.0)),
+            "temporal": float(budget_config_dict.get("recall_weight_temporal", 1.0)),
+        }
+        if retrieval_weights:
+            effective_weights.update(retrieval_weights)
+        # Only pass weights if any differ from default (1.0)
+        rrf_weights = effective_weights if any(w != 1.0 for w in effective_weights.values()) else None
+
         # Log recall start with tags if present (skip if quiet mode for internal operations)
         if not _quiet:
             tags_info = f", tags={tags} ({tags_match})" if tags else ""
@@ -2807,6 +2820,7 @@ async def recall_async(
                             include_source_facts=include_source_facts,
                             max_source_facts_tokens=max_source_facts_tokens,
                             max_source_facts_tokens_per_observation=max_source_facts_tokens_per_observation,
+                            rrf_weights=rrf_weights,
                         )
                         break  # Success - exit retry loop
                     except Exception as e:
@@ -2938,6 +2952,7 @@ async def _search_with_retries(
         include_source_facts: bool = False,
         max_source_facts_tokens: int = 4096,
         max_source_facts_tokens_per_observation: int = -1,
+        rrf_weights: dict[str, float] | None = None,
     ) -> RecallResultModel:
         """
         Search implementation with modular retrieval and reranking.
@@ -3265,10 +3280,14 @@ def to_tuple_format(results):
                 # Merge 3 or 4 result lists depending on temporal constraint
                 if temporal_results:
                     merged_candidates = reciprocal_rank_fusion(
-                        [semantic_results, bm25_results, graph_results, temporal_results]
+                        [semantic_results, bm25_results, graph_results, temporal_results],
+                        weights=rrf_weights,
                     )
                 else:
-                    merged_candidates = reciprocal_rank_fusion([semantic_results, bm25_results, graph_results])
+                    merged_candidates = reciprocal_rank_fusion(
+                        [semantic_results, bm25_results, graph_results],
+                        weights=rrf_weights,
+                    )
 
                 step_duration = time.time() - step_start
                 log_buffer.append(

diff --git a/hindsight-api-slim/hindsight_api/engine/search/fusion.py b/hindsight-api-slim/hindsight_api/engine/search/fusion.py
@@ -7,15 +7,25 @@
 from .types import MergedCandidate, RetrievalResult
 
 
-def reciprocal_rank_fusion(result_lists: list[list[RetrievalResult]], k: int = 60) -> list[MergedCandidate]:
+def reciprocal_rank_fusion(
+    result_lists: list[list[RetrievalResult]],
+    k: int = 60,
+    weights: dict[str, float] | None = None,
+) -> list[MergedCandidate]:
     """
     Merge multiple ranked result lists using Reciprocal Rank Fusion.
 
-    RRF formula: score(d) = sum_over_lists(1 / (k + rank(d)))
+    RRF formula: score(d) = sum_over_lists(w_i / (k + rank(d)))
+
+    When weights are provided, each retrieval strategy's contribution is
+    multiplied by its weight. A weight of 2.0 doubles that strategy's
+    influence; 0.0 disables it entirely. Default weight is 1.0 (unweighted).
 
     Args:
         result_lists: List of result lists, each containing RetrievalResult objects
         k: Constant for RRF formula (default: 60)
+        weights: Optional mapping of strategy name to weight, e.g.
+                 {"semantic": 1.0, "bm25": 1.0, "graph": 2.0, "temporal": 1.0}
 
     Returns:
         Merged list of MergedCandidate objects, sorted by RRF score
@@ -25,8 +35,14 @@ def reciprocal_rank_fusion(result_lists: list[list[RetrievalResult]], k: int = 6
         bm25_results = [RetrievalResult(...), RetrievalResult(...), ...]
         graph_results = [RetrievalResult(...), RetrievalResult(...), ...]
 
+        # Unweighted (default)
         merged = reciprocal_rank_fusion([semantic_results, bm25_results, graph_results])
-        # Returns: [MergedCandidate(...), MergedCandidate(...), ...]
+
+        # With graph retrieval weighted 2x
+        merged = reciprocal_rank_fusion(
+            [semantic_results, bm25_results, graph_results],
+            weights={"graph": 2.0},
+        )
     """
     # Track scores from each list
     rrf_scores = {}
@@ -37,6 +53,7 @@ def reciprocal_rank_fusion(result_lists: list[list[RetrievalResult]], k: int = 6
 
     for source_idx, results in enumerate(result_lists):
         source_name = source_names[source_idx] if source_idx < len(source_names) else f"source_{source_idx}"
+        weight = (weights or {}).get(source_name, 1.0)
 
         for rank, retrieval in enumerate(results, start=1):
             # Type check to catch tuple issues
@@ -56,12 +73,12 @@ def reciprocal_rank_fusion(result_lists: list[list[RetrievalResult]], k: int = 6
             if doc_id not in all_retrievals:
                 all_retrievals[doc_id] = retrieval
 
-            # Calculate RRF score contribution
+            # Calculate weighted RRF score contribution
             if doc_id not in rrf_scores:
                 rrf_scores[doc_id] = 0.0
                 source_ranks[doc_id] = {}
 
-            rrf_scores[doc_id] += 1.0 / (k + rank)
+            rrf_scores[doc_id] += weight / (k + rank)
             source_ranks[doc_id][f"{source_name}_rank"] = rank
 
     # Combine into final results with metadata