Skip to content

Commit 2676131

Browse files
Merge pull request #7 from VectifyAI/v0.3
update readme and config
2 parents 506c46c + 322a220 commit 2676131

4 files changed

Lines changed: 42 additions & 27 deletions

File tree

README.md

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -149,19 +149,19 @@ Run setup: `fs_query_order=prefix`, `beam_size=3`, `max_turns=10`, `5` filesyste
149149

150150
| Retriever | Avg Time (s) | Avg LLM Calls | Hit@1 | Hit@10 | Total Cost (USD) |
151151
|---|---:|---:|---:|---:|---:|
152-
| **Block** | 9.27 | 2.6 | 1.00 | 1.00 | 0.1416 |
153-
| **Vertical** | 22.85 | 6.8 | 0.40 | 1.00 | 0.1682 |
154-
| **Beam** | 18.37 | 5.0 | 0.60 | 1.00 | 0.1331 |
152+
| **Block** | 8.44 | 2.4 | 1.00 | 1.00 | 0.2166 |
153+
| **Vertical** | 28.18 | 6.8 | 0.40 | 1.00 | 0.2900 |
154+
| **Beam** | 18.36 | 4.8 | 0.60 | 1.00 | 0.2091 |
155155

156156
### Claude Sonnet 4.6
157157

158158
| Retriever | Avg Time (s) | Avg LLM Calls | Hit@1 | Hit@10 | Total Cost (USD) |
159159
|---|---:|---:|---:|---:|---:|
160-
| **Block** | 7.95 | 2.8 | 1.00 | 1.00 | 0.1670 |
161-
| **Vertical** | 17.85 | 5.8 | 0.40 | 0.80 | 0.1438 |
162-
| **Beam** | 17.41 | 4.8 | 0.60 | 1.00 | 0.1338 |
160+
| **Block** | 8.42 | 3.4 | 1.00 | 1.00 | 0.0643 |
161+
| **Vertical** | 20.78 | 7.0 | 0.40 | 0.80 | 0.1712 |
162+
| **Beam** | 17.84 | 4.8 | 0.40 | 1.00 | 0.1335 |
163163

164-
`Block` is the best default: perfect Hit@1 across both models. `Beam` and `Vertical` are sensitive to model version — `Block` is the most robust choice.
164+
`Block` is the best default: perfect Hit@1 across both models, lowest cost on Sonnet 4.6 (prompt caching cuts cost by ~60%), and fastest latency. `Beam` and `Vertical` are sensitive to model version — `Block` is the most robust choice.
165165

166166
These numbers are benchmark snapshots, not hard guarantees; exact cost and latency will vary with model choice, provider pricing, prompt-cache behavior, and corpus shape.
167167

bench/benchmark_retrievers.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
sys.path.insert(0, str(Path(__file__).parent.parent))
3131

3232
from contextdb import TreeDB
33-
from contextdb.config import Config
33+
from contextdb.config import Config, get_llm_config
3434
from contextdb.metrics import LLMWithStats, StatisticsRecorder
3535
from contextdb.retriever.algorithm.base_retriever import BaseRetriever
3636

@@ -110,10 +110,16 @@ class BenchmarkResult:
110110
retriever_names: list[str] = field(default_factory=list)
111111
queries: list[QueryResult] = field(default_factory=list)
112112

113-
def summary(self) -> dict:
113+
def summary(self, pricing: dict = None) -> dict:
114114
def total(items, attr):
115115
return sum(getattr(i, attr) for i in items) if items else 0
116116

117+
p = pricing or {}
118+
pi = p.get("price_input", 3)
119+
po = p.get("price_output", 15)
120+
pcw = p.get("price_cache_write", 3.75)
121+
pcr = p.get("price_cache_read", 0.30)
122+
117123
summary = {"queries_run": len(self.queries)}
118124

119125
for name in self.retriever_names:
@@ -128,7 +134,9 @@ def total(items, attr):
128134
total_cache_read = total(valid, "cache_read_tokens")
129135
total_cache_write = total(valid, "cache_creation_tokens")
130136

131-
cost = (total_input * 3 + total_output * 15 + total_cache_write * 3.75 + total_cache_read * 0.30) / 1_000_000
137+
# litellm: input_tokens includes cache_read + cache_creation + uncached
138+
uncached_input = total_input - total_cache_read - total_cache_write
139+
cost = (uncached_input * pi + total_output * po + total_cache_write * pcw + total_cache_read * pcr) / 1_000_000
132140

133141
n = len(valid) if valid else 1
134142
s = {
@@ -471,7 +479,8 @@ def make_tree(db):
471479

472480

473481
def print_summary(result: BenchmarkResult):
474-
summary = result.summary()
482+
pricing = get_llm_config(Config.LLM_PROVIDER, Config.LLM_MODEL)
483+
summary = result.summary(pricing)
475484

476485
print("\n" + "=" * 70)
477486
title = "FILESYSTEM BENCHMARK SUMMARY" if result.mode == "fs" else "BENCHMARK SUMMARY"
@@ -626,7 +635,7 @@ def main():
626635
{"query": q.query, "results": {n: asdict(m) for n, m in q.results.items()}}
627636
for q in result.queries
628637
],
629-
"summary": result.summary(),
638+
"summary": result.summary(get_llm_config(Config.LLM_PROVIDER, Config.LLM_MODEL)),
630639
}, indent=2))
631640
else:
632641
print_summary(result)
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,22 @@
11
# Anthropic models configuration
2+
# Pricing in USD per million tokens
23

34
claude-opus-4-6:
45
context_limit: 200000
56
max_concurrent: 50
67
rpm_limit: 4000
78
tpm_limit: 400000
9+
price_input: 5
10+
price_output: 25
11+
price_cache_write: 6.25
12+
price_cache_read: 0.50
813

914
claude-sonnet-4-6:
1015
context_limit: 200000
1116
max_concurrent: 50
1217
rpm_limit: 4000
1318
tpm_limit: 400000
19+
price_input: 3
20+
price_output: 15
21+
price_cache_write: 3.75
22+
price_cache_read: 0.30

contextdb/config/llm/openai.yaml

Lines changed: 12 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,22 @@
11
# OpenAI models configuration
2+
# Pricing in USD per million tokens
23

3-
gpt-4:
4-
context_limit: 8192
5-
max_concurrent: 10
6-
rpm_limit: 500
7-
tpm_limit: 30000
8-
9-
gpt-4-turbo:
4+
gpt-4o:
105
context_limit: 128000
116
max_concurrent: 20
127
rpm_limit: 500
138
tpm_limit: 150000
9+
price_input: 2.50
10+
price_output: 10
11+
price_cache_write: 2.50
12+
price_cache_read: 1.25
1413

15-
gpt-4o:
14+
gpt-4o-mini:
1615
context_limit: 128000
17-
max_concurrent: 20
16+
max_concurrent: 50
1817
rpm_limit: 500
1918
tpm_limit: 150000
20-
21-
gpt-3.5-turbo:
22-
context_limit: 16385
23-
max_concurrent: 50
24-
rpm_limit: 3500
25-
tpm_limit: 90000
19+
price_input: 0.15
20+
price_output: 0.60
21+
price_cache_write: 0.15
22+
price_cache_read: 0.075

0 commit comments

Comments
 (0)