-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathconfig_code_kb.yml
More file actions
162 lines (143 loc) · 5.34 KB
/
config_code_kb.yml
File metadata and controls
162 lines (143 loc) · 5.34 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
# Perspicacité config — CODE knowledge base (Mistral codestral-embed via OpenRouter)
#
# Use this for a KB of source code / technical docs. codestral-embed is a
# code-specialised embedding (1536-dim) that captures code semantics far better
# than general-text embedders.
#
# Provider: OpenRouter (OpenAI-compatible /embeddings endpoint). Reached through
# the existing LiteLLMEmbeddingProvider via the "openrouter/" model prefix and an
# OPENROUTER_API_KEY in .env — the same key used for LLM routing. Verified working
# 2026-05-28 (dim=1536).
#
# RERANKER DISABLED (reranker_enabled: false). Strong instruction-tuned embedders
# already deliver a near-perfect top-20; a general cross-encoder reorders that list
# and DEMOTES correct hits. Measured on Qwen3-8B (SciFact dev): ms-marco CE −1.4pp,
# bge-reranker-v2-m3 −1.0pp; the same effect was previously seen on OpenAI 3-large
# (bge-reranker −2.1pp). Rule: weak local embedder → rerank ON; strong external
# embedder → rerank OFF. See docs/embedding_reranker_policy.md.
#
# similarity_threshold: 0.0 — API embeddings score lower than MiniLM; the default
# 0.7 would filter out nearly everything.
#
# Costs: codestral-embed billed per OpenRouter pricing (new docs only; queries cheap).
#
# Launch:
# OPENROUTER_API_KEY=$OPENROUTER_API_KEY uv run perspicacite -c config_code_kb.yml serve
version: "2.0.0"
config_name: "code-kb-codestral-port8003"
server:
host: "0.0.0.0"
port: 8003
reload: false
llm:
# Paid OpenRouter with Anthropic Claude Haiku 4.5 as the default.
# Switched from deepseek/deepseek-v4-flash because that model returned
# empty strings intermittently on small structured prompts (search
# optimizer, query variations, etc.), causing visible UX glitches:
# - optimizer "unparseable" on cache hits
# - advanced-mode "Searching with 1 query variations" (loop bailed
# on first empty response)
# Haiku 4.5 is ~500 ms, ~$0.80/$4 per M tokens, and adheres to JSON
# schemas reliably.
default_provider: "openrouter"
default_model: "anthropic/claude-haiku-4-5"
providers:
openrouter:
base_url: "https://openrouter.ai/api/v1"
timeout: 120
max_retries: 3
anthropic:
base_url: "https://api.anthropic.com"
timeout: 120
max_retries: 3
openai:
base_url: "https://api.openai.com/v1"
timeout: 60
max_retries: 3
context:
max_tokens: 8000
chat_history_turns: 10
knowledge_base:
embedding_model: "openrouter/mistralai/codestral-embed-2505" # 1536-dim, code-specialised
chunk_size: 1000
chunk_overlap: 200
chunking_method: "token"
default_top_k: 10
similarity_threshold: 0.0 # API embeddings score lower; don't over-filter
use_two_pass: true
code_language_aware: true # language-aware chunking for source files
database:
path: "~/.local/share/perspicacite/memory.db"
chroma_path: "./chroma_db"
rag_modes:
# Reranking DISABLED for this strong code embedder. A general cross-encoder
# demotes correct hits from an already-near-perfect embedding top-20.
# See docs/embedding_reranker_policy.md §3 for the measured evidence.
reranker_enabled: false
reranker_model: "cross-encoder/ms-marco-MiniLM-L-12-v2" # ignored while disabled
# Hybrid (vector + BM25) DISABLED for this strong embedder. BM25's value
# shrinks as the embedder strengthens: on Qwen3-8B (SciFact dev) a light
# BM25 blend was only +0.6pp, while the balanced/LLM-chosen weights that
# help MiniLM (~+3pp) HURT here (−1.4pp at 0.5, −3.7pp at 0.75 BM25). Advanced
# mode lets the LLM pick the weight, which can land in the harmful zone — so
# pure vector is the safe choice. See docs/embedding_reranker_policy.md §4.
basic:
max_iterations: 1
use_hybrid: false
advanced:
max_iterations: 2
use_hybrid: false
use_wrrf: true
scilex:
enabled: false
# Required for the standalone PubMed-Entrez provider to register at
# startup. Without a real address build_aggregator logs
# 'build_aggregator_pubmed_skipped_no_email' and PubMed silently drops
# out of the aggregator. Mirrors config.yml.
pdf_download:
unpaywall_email: "tao.jiang@univ-cotedazur.fr"
# Mirror config.yml's provider allowlist so users can actually select
# google_scholar / dblp_sparql in the chat UI without the new
# selection_unavailable safeguard rejecting their picks. Stays a subset
# of providers Perspicacité knows how to build — adding e.g. "ads"
# requires ads_api_key in pdf_download or a separate ads block.
search:
enabled_providers:
- europepmc
- pubchem
- core
- inspire
- google_scholar
- dblp_sparql
# Headless Google Scholar via Playwright + OpenRouter Exa fallback.
# Requires `uv sync --extra browser` and `uv run playwright install
# chromium` once. See README ("Google Scholar (optional)").
google_scholar:
enabled: true
headless: true
delay_seconds: 2.0
max_results: 20
openrouter_fallback_enabled: true
openrouter_fallback_model: "deepseek/deepseek-v4-flash"
openrouter_fallback_domains:
- "arxiv.org"
- "biorxiv.org"
- "chemrxiv.org"
- "pubmed.ncbi.nlm.nih.gov"
- "europepmc.org"
- "semanticscholar.org"
- "crossref.org"
- "nature.com"
- "sciencedirect.com"
- "springer.com"
- "wiley.com"
mcp:
enabled: true
host: "0.0.0.0"
port: 8003
transport: "streamable-http"
auth:
enabled: true
logging:
level: "INFO"
format: "json"