diff --git a/docs/libraries/nemo-evaluator-launcher/configuration/executors/local.md b/docs/libraries/nemo-evaluator-launcher/configuration/executors/local.md index df2f16b15..c25de8f2b 100644 --- a/docs/libraries/nemo-evaluator-launcher/configuration/executors/local.md +++ b/docs/libraries/nemo-evaluator-launcher/configuration/executors/local.md @@ -2,13 +2,13 @@ # Local Executor -The Local executor runs evaluations on your machine using Docker. It provides a fast way to iterate if you have Docker installed, evaluating existing endpoints. +The Local executor runs evaluations on your machine. By default it uses Docker containers, and it can also run evaluations directly on the host process (`execution.use_docker: false`). See common concepts and commands in {ref}`executors-overview`. ## Prerequisites -- Docker +- Docker (required only when `execution.use_docker: true`, which is the default) - Python environment with the NeMo Evaluator Launcher CLI available (install the launcher by following {ref}`gs-install`) ## Quick Start @@ -30,6 +30,24 @@ nemo-evaluator-launcher run --config packages/nemo-evaluator-launcher/examples/l -o target.api_endpoint.api_key_name=NGC_API_KEY ``` +### Run without Docker containers + +```bash +nemo-evaluator-launcher run --config packages/nemo-evaluator-launcher/examples/local_basic.yaml \ + --no-docker \ + -o target.api_endpoint.api_key_name=NGC_API_KEY +``` + +Equivalent YAML: + +```yaml +execution: + type: local + use_docker: false +``` + +When using `use_docker: false`, the requested benchmark task must be available from locally installed NeMo Evaluator packages (harness wheels). The launcher now validates this before execution and fails early if the harness/task is not installed. + ## Environment Variables and Secrets Environment variables use the unified prefix syntax (`$host:`, `$lit:`, `$runtime:`) described in {ref}`env-vars-configuration`. Declare them at the top-level `env_vars:` section, at `evaluation.env_vars`, or per-task. Secret values are stored in a `.secrets.env` file alongside the generated `run.sh` and sourced at runtime — they never appear in the script itself. @@ -58,6 +76,7 @@ The Local executor uses Docker volume mounts for data persistence: You can customize your local executor by specifying `extra_docker_args`. This parameter allows you to pass any flag to the `docker run` command that is executed by the NeMo Evaluator Launcher. You can use it to mount additional volumes, set environment variables or customize your network settings. +`extra_docker_args` is ignored when `execution.use_docker: false`. For example, if you would like your job to use a specific docker network, you can specify: diff --git a/packages/nemo-evaluator-launcher/src/nemo_evaluator_launcher/cli/run.py b/packages/nemo-evaluator-launcher/src/nemo_evaluator_launcher/cli/run.py index 9b2ef628e..dcf5c8ae3 100644 --- a/packages/nemo-evaluator-launcher/src/nemo_evaluator_launcher/cli/run.py +++ b/packages/nemo-evaluator-launcher/src/nemo_evaluator_launcher/cli/run.py @@ -94,6 +94,13 @@ class Cmd: "If not specified, loads $PWD/.env if it exists." }, ) + no_docker: bool = field( + default=False, + alias=["--no-docker"], + metadata={ + "help": "Run local executor tasks directly on host without launching Docker containers. Equivalent to setting execution.use_docker=false." + }, + ) def _parse_requested_tasks(self) -> list[str]: """Parse -t arguments into a list of task names. @@ -207,6 +214,16 @@ def execute(self) -> None: hydra_overrides=self.override, ) + if self.no_docker: + if config.execution.type != "local": + raise ValueError( + "--no-docker is only supported with execution.type=local." + ) + is_struct = OmegaConf.is_struct(config) + OmegaConf.set_struct(config, False) + config.execution.use_docker = False + OmegaConf.set_struct(config, is_struct) + # Apply task filtering if -t is specified if requested_tasks: config = filter_tasks(config, requested_tasks) diff --git a/packages/nemo-evaluator-launcher/src/nemo_evaluator_launcher/common/helpers.py b/packages/nemo-evaluator-launcher/src/nemo_evaluator_launcher/common/helpers.py index 6643fb26a..f2b4a6a4d 100644 --- a/packages/nemo-evaluator-launcher/src/nemo_evaluator_launcher/common/helpers.py +++ b/packages/nemo-evaluator-launcher/src/nemo_evaluator_launcher/common/helpers.py @@ -16,6 +16,7 @@ import base64 import datetime import os +import shlex from dataclasses import dataclass from typing import Optional @@ -61,8 +62,9 @@ def _str_to_echo_command(str_to_save: str, filename: str) -> CmdAndReadableComme debug_str = "\n".join( [f"# Contents of {filename}"] + ["# " + s for s in str_to_save.splitlines()] ) + quoted_filename = shlex.quote(filename) return CmdAndReadableComment( - cmd=f'echo "{str_to_save_b64}" | base64 -d > {filename}', debug=debug_str + cmd=f'echo "{str_to_save_b64}" | base64 -d > {quoted_filename}', debug=debug_str ) @@ -167,6 +169,7 @@ def get_eval_factory_command( cfg: DictConfig, user_task_config: DictConfig, task_definition: dict, + output_dir: str = CONTAINER_RESULTS_DIR, ) -> CmdAndReadableComment: # This gets the eval_factory_config merged from both top-level and task-level. merged_nemo_evaluator_config = get_eval_factory_config( @@ -214,7 +217,7 @@ def get_eval_factory_command( _set_nested_optionally_overriding( merged_nemo_evaluator_config, ["config", "output_dir"], - CONTAINER_RESULTS_DIR, + output_dir, ) api_key_name = get_api_key_name(cfg) if api_key_name: @@ -275,7 +278,7 @@ def get_eval_factory_command( if config_path: create_unresolved_config_cmd = _str_to_echo_command( open(config_path, "r").read(), - filename=f"{CONTAINER_RESULTS_DIR}/launcher_unresolved_config.yaml", + filename=f"{output_dir}/launcher_unresolved_config.yaml", ) commands.append(create_unresolved_config_cmd.cmd) debug.append(create_unresolved_config_cmd.debug) diff --git a/packages/nemo-evaluator-launcher/src/nemo_evaluator_launcher/configs/execution/local.yaml b/packages/nemo-evaluator-launcher/src/nemo_evaluator_launcher/configs/execution/local.yaml index b025e5833..5f0a20f49 100644 --- a/packages/nemo-evaluator-launcher/src/nemo_evaluator_launcher/configs/execution/local.yaml +++ b/packages/nemo-evaluator-launcher/src/nemo_evaluator_launcher/configs/execution/local.yaml @@ -15,5 +15,6 @@ # type: local output_dir: ??? +use_docker: true extra_docker_args: "" mode: sequential diff --git a/packages/nemo-evaluator-launcher/src/nemo_evaluator_launcher/executors/local/executor.py b/packages/nemo-evaluator-launcher/src/nemo_evaluator_launcher/executors/local/executor.py index 1d620e617..4057ebfb2 100644 --- a/packages/nemo-evaluator-launcher/src/nemo_evaluator_launcher/executors/local/executor.py +++ b/packages/nemo-evaluator-launcher/src/nemo_evaluator_launcher/executors/local/executor.py @@ -23,6 +23,7 @@ import platform import shlex import shutil +import signal import subprocess import time from typing import Iterator, List, Optional, Tuple, Union @@ -67,6 +68,61 @@ from nemo_evaluator_launcher.executors.registry import register_executor +def _get_local_available_tasks() -> dict[str, set[str]]: + """Return locally installed NeMo Evaluator tasks grouped by harness.""" + try: + from nemo_evaluator.api import get_available_evaluations + except ImportError as e: + raise RuntimeError( + "execution.use_docker=false requires `nemo-evaluator` to be installed locally. " + "Install nemo-evaluator (with the harness/task wheels you need), or enable Docker execution." + ) from e + + framework_task_mapping, _, _ = get_available_evaluations() + return { + framework: set(tasks.keys()) + for framework, tasks in framework_task_mapping.items() + } + + +def _validate_task_available_locally( + *, + task_query: str, + task_definition: dict, + available_tasks_by_harness: dict[str, set[str]], +) -> None: + """Validate that a task exists in locally installed NeMo Evaluator packages.""" + harness_name = str(task_definition.get("harness") or "") + task_name = str(task_definition.get("task") or "") + + if harness_name: + harness_tasks = available_tasks_by_harness.get(harness_name) + if harness_tasks is None: + available_harnesses = sorted(available_tasks_by_harness.keys()) + raise ValueError( + f"Task '{task_query}' requires harness '{harness_name}', but this harness is not installed locally. " + f"Installed harnesses: {available_harnesses or ['']}. " + "Install the corresponding NeMo Evaluator wheel, or run with Docker." + ) + if task_name not in harness_tasks: + available_tasks = sorted(harness_tasks) + raise ValueError( + f"Task '{task_query}' is not available in installed harness '{harness_name}'. " + f"Available tasks in this harness: {available_tasks or ['']}. " + "Install a wheel that contains this task, or run with Docker." + ) + return + + matching_harnesses = [ + harness for harness, tasks in available_tasks_by_harness.items() if task_name in tasks + ] + if not matching_harnesses: + raise ValueError( + f"Task '{task_query}' is not available in locally installed NeMo Evaluator packages. " + "Install a wheel that contains this task, or run with Docker." + ) + + @register_executor("local") class LocalExecutor(BaseExecutor): @classmethod @@ -83,12 +139,21 @@ def execute_eval(cls, cfg: DictConfig, dry_run: bool = False) -> str: Raises: RuntimeError: If the run script fails. """ + use_docker = bool(cfg.execution.get("use_docker", True)) + # Check if docker is available (skip in dry_run mode) - if not dry_run and shutil.which("docker") is None: + if use_docker and not dry_run and shutil.which("docker") is None: raise RuntimeError( "Docker is not installed or not in PATH. " "Please install Docker to run local evaluations." ) + if not use_docker and cfg.deployment.type != "none": + raise ValueError( + "execution.use_docker=false is only supported with deployment.type=none." + ) + local_available_tasks: dict[str, set[str]] | None = None + if not use_docker: + local_available_tasks = _get_local_available_tasks() # Generate invocation ID for this evaluation run invocation_id = generate_invocation_id() @@ -136,6 +201,12 @@ def execute_eval(cls, cfg: DictConfig, dry_run: bool = False) -> str: container=task.get("container"), endpoint_type=task.get("endpoint_type"), ) + if not use_docker: + _validate_task_available_locally( + task_query=task.name, + task_definition=task_definition, + available_tasks_by_harness=local_available_tasks or {}, + ) # Track unlisted tasks for safeguard check if task_definition.get("is_unlisted", False): @@ -194,11 +265,15 @@ def execute_eval(cls, cfg: DictConfig, dry_run: bool = False) -> str: dataset_mount_container = None dataset_env_var_value = None if "dataset_dir" in task: - dataset_mount_host = task["dataset_dir"] - # Get container mount path (default to /datasets if not specified) - dataset_mount_container = task.get("dataset_mount_path", "/datasets") - # Set NEMO_EVALUATOR_DATASET_DIR to the container mount path - dataset_env_var_value = dataset_mount_container + if use_docker: + dataset_mount_host = task["dataset_dir"] + # Get container mount path (default to /datasets if not specified) + dataset_mount_container = task.get("dataset_mount_path", "/datasets") + # Set NEMO_EVALUATOR_DATASET_DIR to the container mount path + dataset_env_var_value = dataset_mount_container + else: + # In no-docker mode, pass dataset_dir directly to local process. + dataset_env_var_value = task["dataset_dir"] # Build env_groups for secrets file generation env_groups = {} @@ -225,7 +300,12 @@ def execute_eval(cls, cfg: DictConfig, dry_run: bool = False) -> str: task_output_dir = output_dir / task.name task_output_dir.mkdir(parents=True, exist_ok=True) eval_factory_command_struct = get_eval_factory_command( - cfg, task, task_definition + cfg, + task, + task_definition, + output_dir=( + "/results" if use_docker else str(task_output_dir / "artifacts") + ), ) eval_factory_command = eval_factory_command_struct.cmd # The debug comment for placing into the script and easy debug. Reason @@ -257,6 +337,7 @@ def execute_eval(cls, cfg: DictConfig, dry_run: bool = False) -> str: "dataset_mount_host": dataset_mount_host, "dataset_mount_container": dataset_mount_container, "dataset_env_var_value": dataset_env_var_value, + "run_with_docker": use_docker, } evaluation_tasks.append(evaluation_task) @@ -271,6 +352,7 @@ def execute_eval(cls, cfg: DictConfig, dry_run: bool = False) -> str: evaluation_tasks=[evaluation_task], auto_export_destinations=auto_export_destinations, extra_docker_args=extra_docker_args, + has_docker_tasks=use_docker, ).rstrip("\n") + "\n" ) @@ -288,6 +370,7 @@ def execute_eval(cls, cfg: DictConfig, dry_run: bool = False) -> str: evaluation_tasks=evaluation_tasks, auto_export_destinations=auto_export_destinations, extra_docker_args=extra_docker_args, + has_docker_tasks=use_docker, ).rstrip("\n") + "\n" ) @@ -386,8 +469,13 @@ def execute_eval(cls, cfg: DictConfig, dry_run: bool = False) -> str: executor="local", data={ "output_dir": str(evaluation_task["output_dir"]), - "container": evaluation_task["client_container_name"], + "container": ( + evaluation_task["client_container_name"] + if use_docker + else "" + ), "eval_image": evaluation_task["eval_image"], + "use_docker": use_docker, }, config=OmegaConf.to_object(cfg), ) @@ -711,33 +799,46 @@ def kill_job(job_id: str) -> None: f"Job {job_id} is not a local job (executor: {job_data.executor})" ) - # Get container name from database - container_name = job_data.data.get("container") - if not container_name: - raise ValueError(f"No container name found for job {job_id}") + use_docker = bool(job_data.data.get("use_docker", True)) + output_dir = pathlib.Path(job_data.data.get("output_dir", "")) + container_name = job_data.data.get("container") or "" killed_something = False - # First, try to stop the Docker container if it's running - result = subprocess.run( - shlex.split(f"docker stop {container_name}"), - capture_output=True, - text=True, - timeout=30, - ) - if result.returncode == 0: - killed_something = True - # Don't raise error if container doesn't exist (might be still pulling) - - # Find and kill Docker processes for this container - result = subprocess.run( - shlex.split(f"pkill -f 'docker run.*{container_name}'"), - capture_output=True, - text=True, - timeout=10, - ) - if result.returncode == 0: - killed_something = True + # Try to stop script process group if a pid file is present. + pid_file = output_dir / "logs" / "stage.pid" + if pid_file.exists(): + try: + pid = int(pid_file.read_text().strip()) + if hasattr(os, "killpg"): + os.killpg(pid, signal.SIGTERM) + else: + os.kill(pid, signal.SIGTERM) + killed_something = True + except (OSError, ValueError): + pass + + if use_docker and container_name: + # First, try to stop the Docker container if it's running + result = subprocess.run( + shlex.split(f"docker stop {container_name}"), + capture_output=True, + text=True, + timeout=30, + ) + if result.returncode == 0: + killed_something = True + # Don't raise error if container doesn't exist (might be still pulling) + + # Find and kill Docker processes for this container + result = subprocess.run( + shlex.split(f"pkill -f 'docker run.*{container_name}'"), + capture_output=True, + text=True, + timeout=10, + ) + if result.returncode == 0: + killed_something = True # If we successfully killed something, mark as killed if killed_something: @@ -758,7 +859,13 @@ def kill_job(job_id: str) -> None: # Use common helper to get informative error message based on job status current_status = status_list[0].state if status_list else None error_msg = LocalExecutor.get_kill_failure_message( - job_id, f"container: {container_name}", current_status + job_id, + ( + f"container: {container_name}" + if container_name + else f"pid_file: {pid_file}" + ), + current_status, ) raise RuntimeError(error_msg) diff --git a/packages/nemo-evaluator-launcher/src/nemo_evaluator_launcher/executors/local/run.template.sh b/packages/nemo-evaluator-launcher/src/nemo_evaluator_launcher/executors/local/run.template.sh index 76f048cd9..d5575c54c 100644 --- a/packages/nemo-evaluator-launcher/src/nemo_evaluator_launcher/executors/local/run.template.sh +++ b/packages/nemo-evaluator-launcher/src/nemo_evaluator_launcher/executors/local/run.template.sh @@ -14,8 +14,10 @@ # limitations under the License. -# check if docker exists +# check if docker exists when any task uses docker +{% if has_docker_tasks %} command -v docker >/dev/null 2>&1 || { echo 'docker not found'; exit 1; } +{% endif %} # Initialize: remove killed jobs file from previous runs script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" @@ -57,14 +59,17 @@ else # Debug contents of the eval factory command's config {{ task.eval_factory_command_debug_comment | indent(4) }} - # Docker run with eval factory command + # Execute evaluation task ( {% if task.secrets_env_content -%} - # Source secrets (scoped to subshell); re-exports happen before each docker run + # Source secrets (scoped to subshell) source "$task_dir/.secrets.env" {% endif -%} + echo "$$" > "$logs_dir/stage.pid" + trap 'rm -f "$logs_dir/stage.pid"' EXIT echo "$(date -u +%Y-%m-%dT%H:%M:%SZ)" > "$logs_dir/stage.running" + {% if task.run_with_docker %} {% if task.deployment %} {% if task.deployment_reexport_cmd -%} # Re-export deployment env vars to original names @@ -126,9 +131,19 @@ else echo "Container completed successfully" >&2; exit 0; ' > "$logs_dir/client_stdout.log" 2>&1 - exit_code=$? + {% else %} + {% if task.eval_reexport_cmd -%} + # Re-export eval env vars to original names + {{ task.eval_reexport_cmd }} + {% endif -%} + {% if task.dataset_env_var_value -%} + export NEMO_EVALUATOR_DATASET_DIR="{{ task.dataset_env_var_value }}" + {% endif -%} + {{ task.eval_factory_command }} > "$logs_dir/client_stdout.log" 2>&1 + {% endif %} + exit_code=$? - {% if task.deployment %} + {% if task.run_with_docker and task.deployment %} # Stop the server docker stop $SERVER_CONTAINER_NAME 2>/dev/null || true {% endif %} diff --git a/packages/nemo-evaluator-launcher/src/nemo_evaluator_launcher/resources/config_templates/execution/local.yaml b/packages/nemo-evaluator-launcher/src/nemo_evaluator_launcher/resources/config_templates/execution/local.yaml index a1f4cc946..9c3812f4c 100644 --- a/packages/nemo-evaluator-launcher/src/nemo_evaluator_launcher/resources/config_templates/execution/local.yaml +++ b/packages/nemo-evaluator-launcher/src/nemo_evaluator_launcher/resources/config_templates/execution/local.yaml @@ -3,3 +3,4 @@ defaults: execution: output_dir: nel-results + use_docker: true diff --git a/packages/nemo-evaluator-launcher/tests/unit_tests/test_cli_integration.py b/packages/nemo-evaluator-launcher/tests/unit_tests/test_cli_integration.py index 5a2aa7972..2dc8f810d 100644 --- a/packages/nemo-evaluator-launcher/tests/unit_tests/test_cli_integration.py +++ b/packages/nemo-evaluator-launcher/tests/unit_tests/test_cli_integration.py @@ -819,3 +819,49 @@ def test_config_parameter_with_various_extensions( run_cmd.execute() call_kwargs = mock_compose.call_args.kwargs assert call_kwargs["config_name"] == "test_config" + + def test_no_docker_flag_sets_execution_use_docker_false( + self, mock_execdb, mock_api_endpoint_check, mock_print + ): + config_dict = { + "deployment": {"type": "none"}, + "execution": {"type": "local", "output_dir": "/tmp/test_output"}, + "target": { + "api_endpoint": {"api_key_name": "test_key", "model_id": "test_model"} + }, + "evaluation": {"tasks": [{"name": "test_task_1"}]}, + } + + with ( + patch("nemo_evaluator_launcher.api.types.hydra.compose") as mock_compose, + patch("nemo_evaluator_launcher.api.functional.run_eval") as mock_run_eval, + ): + mock_compose.return_value = OmegaConf.create(config_dict) + mock_run_eval.return_value = None + + run_cmd = RunCmd(no_docker=True, dry_run=True) + run_cmd.execute() + + called_cfg = mock_run_eval.call_args.args[0] + assert called_cfg.execution.use_docker is False + + def test_no_docker_flag_rejects_non_local_executor( + self, mock_execdb, mock_api_endpoint_check, mock_print + ): + config_dict = { + "deployment": {"type": "none"}, + "execution": {"type": "dummy", "output_dir": "/tmp/test_output"}, + "target": { + "api_endpoint": {"api_key_name": "test_key", "model_id": "test_model"} + }, + "evaluation": {"tasks": [{"name": "test_task_1"}]}, + } + + with patch("nemo_evaluator_launcher.api.types.hydra.compose") as mock_compose: + mock_compose.return_value = OmegaConf.create(config_dict) + + with pytest.raises( + ValueError, + match="--no-docker is only supported with execution.type=local", + ): + RunCmd(no_docker=True, dry_run=True).execute() diff --git a/packages/nemo-evaluator-launcher/tests/unit_tests/test_get_eval_factory_command.py b/packages/nemo-evaluator-launcher/tests/unit_tests/test_get_eval_factory_command.py index 1812d270e..1f3424178 100644 --- a/packages/nemo-evaluator-launcher/tests/unit_tests/test_get_eval_factory_command.py +++ b/packages/nemo-evaluator-launcher/tests/unit_tests/test_get_eval_factory_command.py @@ -93,3 +93,34 @@ def test_get_eval_factory_command_basic(monkeypatch): # The command to run eval is present assert "&& $cmd run_eval --run_config config_ef.yaml" in result.cmd + + +def test_get_eval_factory_command_custom_output_dir(): + cfg = OmegaConf.create( + { + "evaluation": {"nemo_evaluator_config": {"config": {}}}, + "deployment": {"type": "none"}, + "target": { + "api_endpoint": { + "url": "https://example.test/api", + "model_id": "model-123", + "api_key_name": "MY_API_KEY", + } + }, + } + ) + user_task_config = OmegaConf.create({"nemo_evaluator_config": {"config": {}}}) + task_definition = {"endpoint_type": "chat", "task": "my_task"} + + result = get_eval_factory_command( + cfg, + user_task_config, + task_definition, + output_dir="/tmp/nel/results", + ) + + b64 = _extract_b64_from_echo_cmd(result.cmd) + decoded_yaml = base64.b64decode(b64.encode("utf-8")).decode("utf-8") + merged = yaml.safe_load(decoded_yaml) + + assert merged["config"]["output_dir"] == "/tmp/nel/results" diff --git a/packages/nemo-evaluator-launcher/tests/unit_tests/test_local_executor.py b/packages/nemo-evaluator-launcher/tests/unit_tests/test_local_executor.py index 103164d01..00e82305e 100644 --- a/packages/nemo-evaluator-launcher/tests/unit_tests/test_local_executor.py +++ b/packages/nemo-evaluator-launcher/tests/unit_tests/test_local_executor.py @@ -261,6 +261,134 @@ def mock_get_task_def_side_effect(*_args, **kwargs): if env_var in os.environ: del os.environ[env_var] + def test_execute_eval_dry_run_no_docker_generates_host_commands( + self, sample_config, mock_tasks_mapping + ): + """When execution.use_docker=false, generated scripts should run on host.""" + sample_config.execution.use_docker = False + os.environ["TEST_API_KEY"] = "test_key_value" + os.environ["GLOBAL_VALUE"] = "global_env_value" + os.environ["TASK_VALUE"] = "task_env_value" + + try: + with ( + patch( + "nemo_evaluator_launcher.executors.local.executor.load_tasks_mapping" + ) as mock_load_mapping, + patch( + "nemo_evaluator_launcher.executors.local.executor._get_local_available_tasks" + ) as mock_get_local_tasks, + patch( + "nemo_evaluator_launcher.executors.local.executor.get_task_definition_for_job" + ) as mock_get_task_def, + patch( + "nemo_evaluator_launcher.executors.local.executor.get_eval_factory_command" + ) as mock_get_command, + patch("builtins.print"), + ): + mock_load_mapping.return_value = mock_tasks_mapping + mock_get_local_tasks.return_value = { + "lm-eval": {"test_task_1"}, + "helm": {"test_task_2"}, + } + + def mock_get_task_def_side_effect(*_args, **kwargs): + task_name = kwargs.get("task_query") + mapping = kwargs.get("base_mapping", {}) + for (_harness, name), definition in mapping.items(): + if name == task_name: + return definition + raise KeyError(f"Task {task_name} not found") + + mock_get_task_def.side_effect = mock_get_task_def_side_effect + from nemo_evaluator_launcher.common.helpers import CmdAndReadableComment + + mock_get_command.return_value = CmdAndReadableComment( + cmd="nemo-evaluator run_eval --run_config config_ef.yaml", + debug="# Host command", + ) + + invocation_id = LocalExecutor.execute_eval(sample_config, dry_run=True) + + output_base = pathlib.Path(sample_config.execution.output_dir) + output_dir = None + for item in output_base.iterdir(): + if item.is_dir() and item.name.endswith(f"-{invocation_id}"): + output_dir = item + break + assert output_dir is not None + + run_script = (output_dir / "test_task_1" / "run.sh").read_text() + assert "docker run" not in run_script + assert "docker not found" not in run_script + assert ( + 'nemo-evaluator run_eval --run_config config_ef.yaml > "$logs_dir/client_stdout.log" 2>&1' + in run_script + ) + + for call in mock_get_command.call_args_list: + assert call.kwargs["output_dir"].endswith("/artifacts") + finally: + for env_var in ["TEST_API_KEY", "GLOBAL_VALUE", "TASK_VALUE"]: + if env_var in os.environ: + del os.environ[env_var] + + def test_execute_eval_no_docker_with_deployment_raises(self, sample_config): + """No-docker mode only supports deployment.type=none.""" + sample_config.execution.use_docker = False + sample_config.deployment.type = "vllm" + + with pytest.raises( + ValueError, + match="execution.use_docker=false is only supported with deployment.type=none", + ): + LocalExecutor.execute_eval(sample_config, dry_run=True) + + def test_execute_eval_no_docker_missing_local_task_raises( + self, sample_config, mock_tasks_mapping + ): + sample_config.execution.use_docker = False + os.environ["TEST_API_KEY"] = "test_key_value" + os.environ["GLOBAL_VALUE"] = "global_env_value" + os.environ["TASK_VALUE"] = "task_env_value" + + try: + with ( + patch( + "nemo_evaluator_launcher.executors.local.executor.load_tasks_mapping" + ) as mock_load_mapping, + patch( + "nemo_evaluator_launcher.executors.local.executor._get_local_available_tasks" + ) as mock_get_local_tasks, + patch( + "nemo_evaluator_launcher.executors.local.executor.get_task_definition_for_job" + ) as mock_get_task_def, + ): + mock_load_mapping.return_value = mock_tasks_mapping + mock_get_local_tasks.return_value = { + "lm-eval": {"some_other_task"}, + "helm": {"test_task_2"}, + } + + def mock_get_task_def_side_effect(*_args, **kwargs): + task_name = kwargs.get("task_query") + mapping = kwargs.get("base_mapping", {}) + for (_harness, name), definition in mapping.items(): + if name == task_name: + return definition + raise KeyError(f"Task {task_name} not found") + + mock_get_task_def.side_effect = mock_get_task_def_side_effect + + with pytest.raises( + ValueError, match="not available in installed harness" + ): + LocalExecutor.execute_eval(sample_config, dry_run=True) + finally: + for env_var in ["TEST_API_KEY", "GLOBAL_VALUE", "TASK_VALUE"]: + if env_var in os.environ: + del os.environ[env_var] + class TestLocalExecutorGetStatus: """Test LocalExecutor get_status functionality."""