|
31 | 31 | from contextdb.api.condb import ConDB |
32 | 32 | from contextdb.retriever.algorithm.beam_retriever import BeamRetriever |
33 | 33 | from contextdb.retriever.algorithm.block_retriever import BlockRetriever |
34 | | -from contextdb.retriever.algorithm.ranker import BM25PathRanker |
| 34 | +from contextdb.retriever.algorithm.ranker import make_ranker |
35 | 35 |
|
36 | 36 | DEFAULT_MODEL = "claude-sonnet-4-6" |
37 | 37 | DEFAULT_DATA_DIR = Path("data/swebench_pathonly") |
38 | 38 |
|
39 | 39 |
|
40 | 40 | def make_filesystem_retriever(db: ConDB, args, node_count: int): |
41 | | - ranker = BM25PathRanker() if args.ranker == "bm25" else None |
42 | 41 | strategy = args.strategy |
43 | 42 | if strategy == "auto": |
44 | 43 | strategy = "beam" if node_count <= 50 else "block" |
45 | 44 | if strategy == "beam": |
46 | 45 | return BeamRetriever(db.storage, db._llm, mode="filesystem") |
47 | 46 | if strategy == "block": |
| 47 | + ranker = make_ranker( |
| 48 | + args.ranker, |
| 49 | + embedding_provider=args.embedding_provider, |
| 50 | + embedding_model=args.embedding_model, |
| 51 | + embedding_api_key=args.embedding_api_key, |
| 52 | + ) |
48 | 53 | return BlockRetriever( |
49 | 54 | db.storage, |
50 | 55 | db._llm, |
@@ -224,6 +229,8 @@ def run(args): |
224 | 229 | "top_k": args.top_k, |
225 | 230 | "strategy": args.strategy, |
226 | 231 | "ranker": args.ranker, |
| 232 | + "embedding_provider": args.embedding_provider if args.ranker == "vector" else None, |
| 233 | + "embedding_model": args.embedding_model if args.ranker == "vector" else None, |
227 | 234 | "limit": args.limit, |
228 | 235 | "num_queries": len(queries), |
229 | 236 | "num_snapshots": len(by_snap), |
@@ -476,8 +483,11 @@ def main(): |
476 | 483 | p.add_argument("--provider", default="anthropic") |
477 | 484 | p.add_argument("--top-k", type=int, default=10) |
478 | 485 | p.add_argument("--strategy", choices=["auto", "beam", "block"], default="auto") |
479 | | - p.add_argument("--ranker", choices=["bm25", "none"], default="none", |
| 486 | + p.add_argument("--ranker", choices=["bm25", "vector", "none"], default="none", |
480 | 487 | help="Optional path ordering for Block merge results") |
| 488 | + p.add_argument("--embedding-provider", default="openai") |
| 489 | + p.add_argument("--embedding-model", default="text-embedding-3-small") |
| 490 | + p.add_argument("--embedding-api-key", default=None) |
481 | 491 | p.add_argument("--max-parallel-blocks", type=int, default=None) |
482 | 492 | p.add_argument("--max-turns", type=int, default=None) |
483 | 493 | p.add_argument("--limit", type=int, default=0, help="0 = all") |
|
0 commit comments