NVIDIA-NeMo · mishana · Mar 1, 2026 · Mar 2, 2026 · Mar 2, 2026 · Mar 2, 2026
@@ -2,13 +2,13 @@
 
 # Local Executor
 
-The Local executor runs evaluations on your machine using Docker. It provides a fast way to iterate if you have Docker installed, evaluating existing endpoints.
+The Local executor runs evaluations on your machine. By default it uses Docker containers, and it can also run evaluations directly on the host process (`execution.use_docker: false`).
 
 See common concepts and commands in {ref}`executors-overview`.
 
 ## Prerequisites
 
-- Docker
+- Docker (required only when `execution.use_docker: true`, which is the default)
 - Python environment with the NeMo Evaluator Launcher CLI available (install the launcher by following {ref}`gs-install`)
 
 ## Quick Start
@@ -30,6 +30,24 @@ nemo-evaluator-launcher run --config packages/nemo-evaluator-launcher/examples/l
   -o target.api_endpoint.api_key_name=NGC_API_KEY
 ```
 
+### Run without Docker containers
+
+```bash
+nemo-evaluator-launcher run --config packages/nemo-evaluator-launcher/examples/local_basic.yaml \
+  --no-docker \
+  -o target.api_endpoint.api_key_name=NGC_API_KEY
+```
+
+Equivalent YAML:
+
+```yaml
+execution:
+  type: local
+  use_docker: false
+```
+
+When using `use_docker: false`, the requested benchmark task must be available from locally installed NeMo Evaluator packages (harness wheels). The launcher now validates this before execution and fails early if the harness/task is not installed.
+
 ## Environment Variables and Secrets
 
 Environment variables use the unified prefix syntax (`$host:`, `$lit:`, `$runtime:`) described in {ref}`env-vars-configuration`. Declare them at the top-level `env_vars:` section, at `evaluation.env_vars`, or per-task. Secret values are stored in a `.secrets.env` file alongside the generated `run.sh` and sourced at runtime — they never appear in the script itself.
@@ -58,6 +76,7 @@ The Local executor uses Docker volume mounts for data persistence:
 You can customize your local executor by specifying `extra_docker_args`.
 This parameter allows you to pass any flag to the `docker run` command that is executed by the NeMo Evaluator Launcher.
 You can use it to mount additional volumes, set environment variables or customize your network settings.
+`extra_docker_args` is ignored when `execution.use_docker: false`.
 
 For example, if you would like your job to use a specific docker network, you can specify:
 

@@ -94,6 +94,13 @@ class Cmd:
             "If not specified, loads $PWD/.env if it exists."
         },
     )
+    no_docker: bool = field(
+        default=False,
+        alias=["--no-docker"],
+        metadata={
+            "help": "Run local executor tasks directly on host without launching Docker containers. Equivalent to setting execution.use_docker=false."
+        },
+    )
 
     def _parse_requested_tasks(self) -> list[str]:
         """Parse -t arguments into a list of task names.
@@ -207,6 +214,16 @@ def execute(self) -> None:
                 hydra_overrides=self.override,
             )
 
+        if self.no_docker:
+            if config.execution.type != "local":
+                raise ValueError(
+                    "--no-docker is only supported with execution.type=local."
+                )
+            is_struct = OmegaConf.is_struct(config)
+            OmegaConf.set_struct(config, False)
+            config.execution.use_docker = False
+            OmegaConf.set_struct(config, is_struct)
+
         # Apply task filtering if -t is specified
         if requested_tasks:
             config = filter_tasks(config, requested_tasks)

@@ -16,6 +16,7 @@
 import base64
 import datetime
 import os
+import shlex
 from dataclasses import dataclass
 from typing import Optional
 
@@ -61,8 +62,9 @@ def _str_to_echo_command(str_to_save: str, filename: str) -> CmdAndReadableComme
     debug_str = "\n".join(
         [f"# Contents of {filename}"] + ["# " + s for s in str_to_save.splitlines()]
     )
+    quoted_filename = shlex.quote(filename)
     return CmdAndReadableComment(
-        cmd=f'echo "{str_to_save_b64}" | base64 -d > {filename}', debug=debug_str
+        cmd=f'echo "{str_to_save_b64}" | base64 -d > {quoted_filename}', debug=debug_str
     )
 
 
@@ -167,6 +169,7 @@ def get_eval_factory_command(
     cfg: DictConfig,
     user_task_config: DictConfig,
     task_definition: dict,
+    output_dir: str = CONTAINER_RESULTS_DIR,
 ) -> CmdAndReadableComment:
     # This gets the eval_factory_config merged from both top-level and task-level.
     merged_nemo_evaluator_config = get_eval_factory_config(
@@ -214,7 +217,7 @@ def get_eval_factory_command(
     _set_nested_optionally_overriding(
         merged_nemo_evaluator_config,
         ["config", "output_dir"],
-        CONTAINER_RESULTS_DIR,
+        output_dir,
     )
     api_key_name = get_api_key_name(cfg)
     if api_key_name:
@@ -275,7 +278,7 @@ def get_eval_factory_command(
     if config_path:
         create_unresolved_config_cmd = _str_to_echo_command(
             open(config_path, "r").read(),
-            filename=f"{CONTAINER_RESULTS_DIR}/launcher_unresolved_config.yaml",
+            filename=f"{output_dir}/launcher_unresolved_config.yaml",
         )
         commands.append(create_unresolved_config_cmd.cmd)
         debug.append(create_unresolved_config_cmd.debug)

@@ -15,5 +15,6 @@
 #
 type: local
 output_dir: ???
+use_docker: true
 extra_docker_args: ""
 mode: sequential
@@ -23,6 +23,7 @@
 import platform
 import shlex
 import shutil
+import signal
 import subprocess
 import time
 from typing import Iterator, List, Optional, Tuple, Union
@@ -67,6 +68,61 @@
 from nemo_evaluator_launcher.executors.registry import register_executor
 
 
+def _get_local_available_tasks() -> dict[str, set[str]]:
+    """Return locally installed NeMo Evaluator tasks grouped by harness."""
+    try:
+        from nemo_evaluator.api import get_available_evaluations
+    except ImportError as e:
+        raise RuntimeError(
+            "execution.use_docker=false requires `nemo-evaluator` to be installed locally. "
+            "Install nemo-evaluator (with the harness/task wheels you need), or enable Docker execution."
+        ) from e
+
+    framework_task_mapping, _, _ = get_available_evaluations()
+    return {
+        framework: set(tasks.keys())
+        for framework, tasks in framework_task_mapping.items()
+    }
+
+
+def _validate_task_available_locally(
+    *,
+    task_query: str,
+    task_definition: dict,
+    available_tasks_by_harness: dict[str, set[str]],
+) -> None:
+    """Validate that a task exists in locally installed NeMo Evaluator packages."""
+    harness_name = str(task_definition.get("harness") or "")
+    task_name = str(task_definition.get("task") or "")
+
+    if harness_name:
+        harness_tasks = available_tasks_by_harness.get(harness_name)
+        if harness_tasks is None:
+            available_harnesses = sorted(available_tasks_by_harness.keys())
+            raise ValueError(
+                f"Task '{task_query}' requires harness '{harness_name}', but this harness is not installed locally. "
+                f"Installed harnesses: {available_harnesses or ['<none>']}. "
+                "Install the corresponding NeMo Evaluator wheel, or run with Docker."
+            )
+        if task_name not in harness_tasks:
+            available_tasks = sorted(harness_tasks)
+            raise ValueError(
+                f"Task '{task_query}' is not available in installed harness '{harness_name}'. "
+                f"Available tasks in this harness: {available_tasks or ['<none>']}. "
+                "Install a wheel that contains this task, or run with Docker."
+            )
+        return
+
+    matching_harnesses = [
+        harness for harness, tasks in available_tasks_by_harness.items() if task_name in tasks
+    ]
+    if not matching_harnesses:
+        raise ValueError(
+            f"Task '{task_query}' is not available in locally installed NeMo Evaluator packages. "
+            "Install a wheel that contains this task, or run with Docker."
+        )
+
+
 @register_executor("local")
 class LocalExecutor(BaseExecutor):
     @classmethod
@@ -83,12 +139,21 @@ def execute_eval(cls, cfg: DictConfig, dry_run: bool = False) -> str:
         Raises:
             RuntimeError: If the run script fails.
         """
+        use_docker = bool(cfg.execution.get("use_docker", True))
+
         # Check if docker is available (skip in dry_run mode)
-        if not dry_run and shutil.which("docker") is None:
+        if use_docker and not dry_run and shutil.which("docker") is None:
             raise RuntimeError(
                 "Docker is not installed or not in PATH. "
                 "Please install Docker to run local evaluations."
             )
+        if not use_docker and cfg.deployment.type != "none":
+            raise ValueError(
+                "execution.use_docker=false is only supported with deployment.type=none."
+            )
+        local_available_tasks: dict[str, set[str]] | None = None
+        if not use_docker:
+            local_available_tasks = _get_local_available_tasks()
 
         # Generate invocation ID for this evaluation run
         invocation_id = generate_invocation_id()
@@ -136,6 +201,12 @@ def execute_eval(cls, cfg: DictConfig, dry_run: bool = False) -> str:
                 container=task.get("container"),
                 endpoint_type=task.get("endpoint_type"),
             )
+            if not use_docker:
+                _validate_task_available_locally(
+                    task_query=task.name,
+                    task_definition=task_definition,
+                    available_tasks_by_harness=local_available_tasks or {},
+                )
 
             # Track unlisted tasks for safeguard check
             if task_definition.get("is_unlisted", False):
@@ -194,11 +265,15 @@ def execute_eval(cls, cfg: DictConfig, dry_run: bool = False) -> str:
             dataset_mount_container = None
             dataset_env_var_value = None
             if "dataset_dir" in task:
-                dataset_mount_host = task["dataset_dir"]
-                # Get container mount path (default to /datasets if not specified)
-                dataset_mount_container = task.get("dataset_mount_path", "/datasets")
-                # Set NEMO_EVALUATOR_DATASET_DIR to the container mount path
-                dataset_env_var_value = dataset_mount_container
+                if use_docker:
+                    dataset_mount_host = task["dataset_dir"]
+                    # Get container mount path (default to /datasets if not specified)
+                    dataset_mount_container = task.get("dataset_mount_path", "/datasets")
+                    # Set NEMO_EVALUATOR_DATASET_DIR to the container mount path
+                    dataset_env_var_value = dataset_mount_container
+                else:
+                    # In no-docker mode, pass dataset_dir directly to local process.
+                    dataset_env_var_value = task["dataset_dir"]
 
             # Build env_groups for secrets file generation
             env_groups = {}
@@ -225,7 +300,12 @@ def execute_eval(cls, cfg: DictConfig, dry_run: bool = False) -> str:
             task_output_dir = output_dir / task.name
             task_output_dir.mkdir(parents=True, exist_ok=True)
             eval_factory_command_struct = get_eval_factory_command(
-                cfg, task, task_definition
+                cfg,
+                task,
+                task_definition,
+                output_dir=(
+                    "/results" if use_docker else str(task_output_dir / "artifacts")
+                ),
             )
             eval_factory_command = eval_factory_command_struct.cmd
             # The debug comment for placing into the script and easy debug. Reason
@@ -257,6 +337,7 @@ def execute_eval(cls, cfg: DictConfig, dry_run: bool = False) -> str:
                 "dataset_mount_host": dataset_mount_host,
                 "dataset_mount_container": dataset_mount_container,
                 "dataset_env_var_value": dataset_env_var_value,
+                "run_with_docker": use_docker,
             }
             evaluation_tasks.append(evaluation_task)
 
@@ -271,6 +352,7 @@ def execute_eval(cls, cfg: DictConfig, dry_run: bool = False) -> str:
                     evaluation_tasks=[evaluation_task],
                     auto_export_destinations=auto_export_destinations,
                     extra_docker_args=extra_docker_args,
+                    has_docker_tasks=use_docker,
                 ).rstrip("\n")
                 + "\n"
             )
@@ -288,6 +370,7 @@ def execute_eval(cls, cfg: DictConfig, dry_run: bool = False) -> str:
                     evaluation_tasks=evaluation_tasks,
                     auto_export_destinations=auto_export_destinations,
                     extra_docker_args=extra_docker_args,
+                    has_docker_tasks=use_docker,
                 ).rstrip("\n")
                 + "\n"
             )
@@ -386,8 +469,13 @@ def execute_eval(cls, cfg: DictConfig, dry_run: bool = False) -> str:
                     executor="local",
                     data={
                         "output_dir": str(evaluation_task["output_dir"]),
-                        "container": evaluation_task["client_container_name"],
+                        "container": (
+                            evaluation_task["client_container_name"]
+                            if use_docker
+                            else ""
+                        ),
                         "eval_image": evaluation_task["eval_image"],
+                        "use_docker": use_docker,
                     },
                     config=OmegaConf.to_object(cfg),
                 )
@@ -711,33 +799,46 @@ def kill_job(job_id: str) -> None:
                 f"Job {job_id} is not a local job (executor: {job_data.executor})"
             )
 
-        # Get container name from database
-        container_name = job_data.data.get("container")
-        if not container_name:
-            raise ValueError(f"No container name found for job {job_id}")
+        use_docker = bool(job_data.data.get("use_docker", True))
+        output_dir = pathlib.Path(job_data.data.get("output_dir", ""))
+        container_name = job_data.data.get("container") or ""
 
         killed_something = False
 
-        # First, try to stop the Docker container if it's running
-        result = subprocess.run(
-            shlex.split(f"docker stop {container_name}"),
-            capture_output=True,
-            text=True,
-            timeout=30,
-        )
-        if result.returncode == 0:
-            killed_something = True
-        # Don't raise error if container doesn't exist (might be still pulling)
-
-        # Find and kill Docker processes for this container
-        result = subprocess.run(
-            shlex.split(f"pkill -f 'docker run.*{container_name}'"),
-            capture_output=True,
-            text=True,
-            timeout=10,
-        )
-        if result.returncode == 0:
-            killed_something = True
+        # Try to stop script process group if a pid file is present.
+        pid_file = output_dir / "logs" / "stage.pid"
+        if pid_file.exists():
+            try:
+                pid = int(pid_file.read_text().strip())
+                if hasattr(os, "killpg"):
+                    os.killpg(pid, signal.SIGTERM)
+                else:
+                    os.kill(pid, signal.SIGTERM)
+                killed_something = True
+            except (OSError, ValueError):
+                pass
+
+        if use_docker and container_name:
+            # First, try to stop the Docker container if it's running
+            result = subprocess.run(
+                shlex.split(f"docker stop {container_name}"),
+                capture_output=True,
+                text=True,
+                timeout=30,
+            )
+            if result.returncode == 0:
+                killed_something = True
+            # Don't raise error if container doesn't exist (might be still pulling)
+
+            # Find and kill Docker processes for this container
+            result = subprocess.run(
+                shlex.split(f"pkill -f 'docker run.*{container_name}'"),
+                capture_output=True,
+                text=True,
+                timeout=10,
+            )
+            if result.returncode == 0:
+                killed_something = True
 
         # If we successfully killed something, mark as killed
         if killed_something:
@@ -758,7 +859,13 @@ def kill_job(job_id: str) -> None:
         # Use common helper to get informative error message based on job status
         current_status = status_list[0].state if status_list else None
         error_msg = LocalExecutor.get_kill_failure_message(
-            job_id, f"container: {container_name}", current_status
+            job_id,
+            (
+                f"container: {container_name}"
+                if container_name
+                else f"pid_file: {pid_file}"
+            ),
+            current_status,
         )
         raise RuntimeError(error_msg)