Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions hindsight-api-slim/hindsight_api/api/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,12 @@ class RecallRequest(BaseModel):
description="Compound tag filter using boolean groups. Groups in the list are AND-ed. "
"Each group is a leaf {tags, match} or compound {and: [...]}, {or: [...]}, {not: ...}.",
)
retrieval_weights: dict[str, float] | None = Field(
default=None,
description="Per-strategy weights for Reciprocal Rank Fusion. Keys: 'semantic', 'bm25', "
"'graph', 'temporal'. Values are multipliers (1.0 = default, 2.0 = double influence, "
"0.0 = disabled). Omitted keys default to the bank/server configuration.",
)

@field_validator("query")
@classmethod
Expand Down Expand Up @@ -3215,6 +3221,7 @@ async def api_recall(
tags=request.tags,
tags_match=request.tags_match,
tag_groups=request.tag_groups,
retrieval_weights=request.retrieval_weights,
)

# Convert core MemoryFact objects to API RecallResult objects (excluding internal metrics)
Expand Down
28 changes: 28 additions & 0 deletions hindsight-api-slim/hindsight_api/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -445,6 +445,10 @@ def normalize_config_dict(config: dict[str, Any]) -> dict[str, Any]:
ENV_RECALL_BUDGET_ADAPTIVE_HIGH = "HINDSIGHT_API_RECALL_BUDGET_ADAPTIVE_HIGH"
ENV_RECALL_BUDGET_MIN = "HINDSIGHT_API_RECALL_BUDGET_MIN"
ENV_RECALL_BUDGET_MAX = "HINDSIGHT_API_RECALL_BUDGET_MAX"
ENV_RECALL_WEIGHT_SEMANTIC = "HINDSIGHT_API_RECALL_WEIGHT_SEMANTIC"
ENV_RECALL_WEIGHT_BM25 = "HINDSIGHT_API_RECALL_WEIGHT_BM25"
ENV_RECALL_WEIGHT_GRAPH = "HINDSIGHT_API_RECALL_WEIGHT_GRAPH"
ENV_RECALL_WEIGHT_TEMPORAL = "HINDSIGHT_API_RECALL_WEIGHT_TEMPORAL"

# Audit log settings
ENV_AUDIT_LOG_ENABLED = "HINDSIGHT_API_AUDIT_LOG_ENABLED"
Expand Down Expand Up @@ -677,6 +681,14 @@ def normalize_config_dict(config: dict[str, Any]) -> dict[str, Any]:
DEFAULT_RECALL_BUDGET_MIN = 20 # Floor for the adaptive function
DEFAULT_RECALL_BUDGET_MAX = 2000 # Ceiling for the adaptive function

# Recall retrieval weights (RRF fusion)
# Per-strategy multipliers for Reciprocal Rank Fusion.
# 1.0 = default (equal weight), 2.0 = double influence, 0.0 = disabled.
DEFAULT_RECALL_WEIGHT_SEMANTIC = 1.0
DEFAULT_RECALL_WEIGHT_BM25 = 1.0
DEFAULT_RECALL_WEIGHT_GRAPH = 1.0
DEFAULT_RECALL_WEIGHT_TEMPORAL = 1.0

# Disposition defaults (None = not set, fall back to bank DB value or 3)
DEFAULT_DISPOSITION_SKEPTICISM = None
DEFAULT_DISPOSITION_LITERALISM = None
Expand Down Expand Up @@ -1122,6 +1134,12 @@ class HindsightConfig:
recall_budget_min: int
recall_budget_max: int

# Recall retrieval weights (hierarchical - can be overridden per bank or per request)
recall_weight_semantic: float
recall_weight_bm25: float
recall_weight_graph: float
recall_weight_temporal: float

# Disposition settings (hierarchical - can be overridden per bank; None = fall back to DB)
disposition_skepticism: int | None
disposition_literalism: int | None
Expand Down Expand Up @@ -1261,6 +1279,11 @@ class HindsightConfig:
"recall_budget_adaptive_high",
"recall_budget_min",
"recall_budget_max",
# Recall retrieval weights
"recall_weight_semantic",
"recall_weight_bm25",
"recall_weight_graph",
"recall_weight_temporal",
# Disposition settings
"disposition_skepticism",
"disposition_literalism",
Expand Down Expand Up @@ -1841,6 +1864,11 @@ def from_env(cls) -> "HindsightConfig":
),
recall_budget_min=int(os.getenv(ENV_RECALL_BUDGET_MIN, str(DEFAULT_RECALL_BUDGET_MIN))),
recall_budget_max=int(os.getenv(ENV_RECALL_BUDGET_MAX, str(DEFAULT_RECALL_BUDGET_MAX))),
# Recall retrieval weights
recall_weight_semantic=float(os.getenv(ENV_RECALL_WEIGHT_SEMANTIC, str(DEFAULT_RECALL_WEIGHT_SEMANTIC))),
recall_weight_bm25=float(os.getenv(ENV_RECALL_WEIGHT_BM25, str(DEFAULT_RECALL_WEIGHT_BM25))),
recall_weight_graph=float(os.getenv(ENV_RECALL_WEIGHT_GRAPH, str(DEFAULT_RECALL_WEIGHT_GRAPH))),
recall_weight_temporal=float(os.getenv(ENV_RECALL_WEIGHT_TEMPORAL, str(DEFAULT_RECALL_WEIGHT_TEMPORAL))),
# Disposition settings (None = fall back to DB value)
disposition_skepticism=int(os.getenv(ENV_DISPOSITION_SKEPTICISM))
if os.getenv(ENV_DISPOSITION_SKEPTICISM)
Expand Down
1 change: 1 addition & 0 deletions hindsight-api-slim/hindsight_api/engine/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ async def recall_async(
include_chunks: bool = False,
max_chunk_tokens: int = 8192,
request_context: "RequestContext",
retrieval_weights: dict[str, float] | None = None,
) -> "RecallResult":
"""
Recall memories relevant to a query.
Expand Down
23 changes: 21 additions & 2 deletions hindsight-api-slim/hindsight_api/engine/memory_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -2655,6 +2655,7 @@ async def recall_async(
tag_groups: list[TagGroup] | None = None,
created_after: datetime | None = None,
created_before: datetime | None = None,
retrieval_weights: dict[str, float] | None = None,
_connection_budget: int | None = None,
_quiet: bool = False,
) -> RecallResultModel:
Expand Down Expand Up @@ -2754,6 +2755,18 @@ async def recall_async(
budget_config_dict = await self._config_resolver.get_bank_config(bank_id, request_context)
thinking_budget = _resolve_thinking_budget(budget_config_dict, budget, max_tokens)

# Build effective retrieval weights: config defaults + per-request overrides
effective_weights: dict[str, float] = {
"semantic": float(budget_config_dict.get("recall_weight_semantic", 1.0)),
"bm25": float(budget_config_dict.get("recall_weight_bm25", 1.0)),
"graph": float(budget_config_dict.get("recall_weight_graph", 1.0)),
"temporal": float(budget_config_dict.get("recall_weight_temporal", 1.0)),
}
if retrieval_weights:
effective_weights.update(retrieval_weights)
# Only pass weights if any differ from default (1.0)
rrf_weights = effective_weights if any(w != 1.0 for w in effective_weights.values()) else None

# Log recall start with tags if present (skip if quiet mode for internal operations)
if not _quiet:
tags_info = f", tags={tags} ({tags_match})" if tags else ""
Expand Down Expand Up @@ -2807,6 +2820,7 @@ async def recall_async(
include_source_facts=include_source_facts,
max_source_facts_tokens=max_source_facts_tokens,
max_source_facts_tokens_per_observation=max_source_facts_tokens_per_observation,
rrf_weights=rrf_weights,
)
break # Success - exit retry loop
except Exception as e:
Expand Down Expand Up @@ -2938,6 +2952,7 @@ async def _search_with_retries(
include_source_facts: bool = False,
max_source_facts_tokens: int = 4096,
max_source_facts_tokens_per_observation: int = -1,
rrf_weights: dict[str, float] | None = None,
) -> RecallResultModel:
"""
Search implementation with modular retrieval and reranking.
Expand Down Expand Up @@ -3265,10 +3280,14 @@ def to_tuple_format(results):
# Merge 3 or 4 result lists depending on temporal constraint
if temporal_results:
merged_candidates = reciprocal_rank_fusion(
[semantic_results, bm25_results, graph_results, temporal_results]
[semantic_results, bm25_results, graph_results, temporal_results],
weights=rrf_weights,
)
else:
merged_candidates = reciprocal_rank_fusion([semantic_results, bm25_results, graph_results])
merged_candidates = reciprocal_rank_fusion(
[semantic_results, bm25_results, graph_results],
weights=rrf_weights,
)

step_duration = time.time() - step_start
log_buffer.append(
Expand Down
27 changes: 22 additions & 5 deletions hindsight-api-slim/hindsight_api/engine/search/fusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,25 @@
from .types import MergedCandidate, RetrievalResult


def reciprocal_rank_fusion(result_lists: list[list[RetrievalResult]], k: int = 60) -> list[MergedCandidate]:
def reciprocal_rank_fusion(
result_lists: list[list[RetrievalResult]],
k: int = 60,
weights: dict[str, float] | None = None,
) -> list[MergedCandidate]:
"""
Merge multiple ranked result lists using Reciprocal Rank Fusion.

RRF formula: score(d) = sum_over_lists(1 / (k + rank(d)))
RRF formula: score(d) = sum_over_lists(w_i / (k + rank(d)))

When weights are provided, each retrieval strategy's contribution is
multiplied by its weight. A weight of 2.0 doubles that strategy's
influence; 0.0 disables it entirely. Default weight is 1.0 (unweighted).

Args:
result_lists: List of result lists, each containing RetrievalResult objects
k: Constant for RRF formula (default: 60)
weights: Optional mapping of strategy name to weight, e.g.
{"semantic": 1.0, "bm25": 1.0, "graph": 2.0, "temporal": 1.0}

Returns:
Merged list of MergedCandidate objects, sorted by RRF score
Expand All @@ -25,8 +35,14 @@ def reciprocal_rank_fusion(result_lists: list[list[RetrievalResult]], k: int = 6
bm25_results = [RetrievalResult(...), RetrievalResult(...), ...]
graph_results = [RetrievalResult(...), RetrievalResult(...), ...]

# Unweighted (default)
merged = reciprocal_rank_fusion([semantic_results, bm25_results, graph_results])
# Returns: [MergedCandidate(...), MergedCandidate(...), ...]

# With graph retrieval weighted 2x
merged = reciprocal_rank_fusion(
[semantic_results, bm25_results, graph_results],
weights={"graph": 2.0},
)
"""
# Track scores from each list
rrf_scores = {}
Expand All @@ -37,6 +53,7 @@ def reciprocal_rank_fusion(result_lists: list[list[RetrievalResult]], k: int = 6

for source_idx, results in enumerate(result_lists):
source_name = source_names[source_idx] if source_idx < len(source_names) else f"source_{source_idx}"
weight = (weights or {}).get(source_name, 1.0)

for rank, retrieval in enumerate(results, start=1):
# Type check to catch tuple issues
Expand All @@ -56,12 +73,12 @@ def reciprocal_rank_fusion(result_lists: list[list[RetrievalResult]], k: int = 6
if doc_id not in all_retrievals:
all_retrievals[doc_id] = retrieval

# Calculate RRF score contribution
# Calculate weighted RRF score contribution
if doc_id not in rrf_scores:
rrf_scores[doc_id] = 0.0
source_ranks[doc_id] = {}

rrf_scores[doc_id] += 1.0 / (k + rank)
rrf_scores[doc_id] += weight / (k + rank)
source_ranks[doc_id][f"{source_name}_rank"] = rank

# Combine into final results with metadata
Expand Down
Loading
Loading