1212load_dotenv (dotenv_path = Path (__file__ ).parents [2 ] / ".env" , override = True )
1313
1414from .dataset import REGISTRY as DATASET_REGISTRY , get_dataset
15- from .llm import REGISTRY as LLM_REGISTRY , get_llm , get_answer_llm
15+ from .llm import REGISTRY as LLM_REGISTRY , get_answer_llm
1616from .memory import REGISTRY as MEMORY_REGISTRY , get_memory_provider
1717from .modes import REGISTRY as MODE_REGISTRY , get_mode
1818from .runner import EvalRunner
2222console = Console ()
2323
2424
25- def _resolve_gemini_key () -> None :
26- key = os .environ .get ("GEMINI_API_KEY" ) or os .environ .get ("GOOGLE_API_KEY" )
27- if not key :
28- typer .echo ("Error: GEMINI_API_KEY environment variable is not set." , err = True )
25+ def _ensure_provider_env (provider : str , role : str ) -> None :
26+ if provider not in LLM_REGISTRY :
27+ typer .echo (
28+ f"Error: unknown { role .lower ()} LLM provider '{ provider } '. Available: { ', ' .join (LLM_REGISTRY )} ." ,
29+ err = True ,
30+ )
2931 raise typer .Exit (1 )
30- os .environ ["GOOGLE_API_KEY" ] = key
32+
33+ if provider == "anthropic" :
34+ if not os .environ .get ("ANTHROPIC_API_KEY" ):
35+ typer .echo (f"Error: { role } LLM provider '{ provider } ' requires ANTHROPIC_API_KEY." , err = True )
36+ raise typer .Exit (1 )
37+ return
38+
39+ if provider == "gemini" :
40+ key = os .environ .get ("GEMINI_API_KEY" ) or os .environ .get ("GOOGLE_API_KEY" )
41+ if not key :
42+ typer .echo (f"Error: { role } LLM provider '{ provider } ' requires GEMINI_API_KEY." , err = True )
43+ raise typer .Exit (1 )
44+ os .environ ["GOOGLE_API_KEY" ] = key
45+ return
46+
47+ if provider == "groq" :
48+ if not os .environ .get ("GROQ_API_KEY" ):
49+ typer .echo (f"Error: { role } LLM provider '{ provider } ' requires GROQ_API_KEY." , err = True )
50+ raise typer .Exit (1 )
51+ return
52+
53+ if provider == "openai" :
54+ if not os .environ .get ("OPENAI_API_KEY" ):
55+ typer .echo (f"Error: { role } LLM provider '{ provider } ' requires OPENAI_API_KEY." , err = True )
56+ raise typer .Exit (1 )
57+ return
58+
59+
60+ def _validate_run_env (memory : str , mode : str , answer_provider : str | None = None ) -> None :
61+ if answer_provider is not None :
62+ os .environ ["OMB_ANSWER_LLM" ] = answer_provider
63+
64+ answer_provider = os .environ .get ("OMB_ANSWER_LLM" , "groq" )
65+ judge_provider = os .environ .get ("OMB_JUDGE_LLM" , "gemini" )
66+ _ensure_provider_env (answer_provider , "Answer" )
67+ _ensure_provider_env (judge_provider , "Judge" )
68+
69+ if mode == "agentic-rag" and answer_provider != "gemini" :
70+ typer .echo (
71+ f"Error: response mode 'agentic-rag' requires a tool-capable LLM provider; '{ answer_provider } ' is not supported." ,
72+ err = True ,
73+ )
74+ raise typer .Exit (1 )
75+
76+ if memory == "hindsight" :
77+ key = os .environ .get ("GEMINI_API_KEY" ) or os .environ .get ("GOOGLE_API_KEY" )
78+ if not key :
79+ typer .echo ("Error: memory provider 'hindsight' requires GEMINI_API_KEY for embedded extraction." , err = True )
80+ raise typer .Exit (1 )
81+ os .environ ["GOOGLE_API_KEY" ] = key
3182
3283
3384@app .command ()
@@ -36,7 +87,7 @@ def run(
3687 dataset : str = typer .Option ("tempo" , "--dataset" , help = f"Dataset. Available: { ', ' .join (DATASET_REGISTRY )} " ),
3788 memory : str = typer .Option ("bm25" , "--memory" , "-m" , help = f"Memory provider. Available: { ', ' .join (MEMORY_REGISTRY )} " ),
3889 mode : str = typer .Option ("rag" , "--mode" , help = f"Response mode. Available: { ', ' .join (MODE_REGISTRY )} " ),
39- llm : str = typer .Option ("gemini" , "--llm" , help = f"LLM for answer generation. Available: { ', ' .join (LLM_REGISTRY )} " ),
90+ llm : str | None = typer .Option (None , "--llm" , help = f"LLM provider for answer generation. Overrides OMB_ANSWER_LLM . Available: { ', ' .join (LLM_REGISTRY )} " ),
4091 category : str = typer .Option (None , "--category" , "-c" , help = "Category filter(s), comma-separated (e.g. 'a,b,c'). With --query-limit, runs N queries per category." ),
4192 query_limit : int = typer .Option (None , "--query-limit" , "-q" , help = "Max queries to evaluate. When combined with multiple --category values, applies per category." ),
4293 query_id : str = typer .Option (None , "--query-id" , help = "Run a single specific query by ID" ),
@@ -53,7 +104,7 @@ def run(
53104 description : str = typer .Option (None , "--description" , "-d" , help = "Optional description for this run (stored in the result JSON)" ),
54105) -> None :
55106 """Run an evaluation on a single split (optionally filtered to a category)."""
56- _resolve_gemini_key ( )
107+ _validate_run_env ( memory , mode , llm )
57108
58109 ds = get_dataset (dataset )
59110
0 commit comments