diff --git a/docs/libraries/nemo-evaluator-launcher/configuration/executors/local.md b/docs/libraries/nemo-evaluator-launcher/configuration/executors/local.md
index df2f16b15..c25de8f2b 100644
--- a/docs/libraries/nemo-evaluator-launcher/configuration/executors/local.md
+++ b/docs/libraries/nemo-evaluator-launcher/configuration/executors/local.md
@@ -2,13 +2,13 @@
 
 # Local Executor
 
-The Local executor runs evaluations on your machine using Docker. It provides a fast way to iterate if you have Docker installed, evaluating existing endpoints.
+The Local executor runs evaluations on your machine. By default it uses Docker containers, and it can also run evaluations directly on the host process (`execution.use_docker: false`).
 
 See common concepts and commands in {ref}`executors-overview`.
 
 ## Prerequisites
 
-- Docker
+- Docker (required only when `execution.use_docker: true`, which is the default)
 - Python environment with the NeMo Evaluator Launcher CLI available (install the launcher by following {ref}`gs-install`)
 
 ## Quick Start
@@ -30,6 +30,24 @@ nemo-evaluator-launcher run --config packages/nemo-evaluator-launcher/examples/l
   -o target.api_endpoint.api_key_name=NGC_API_KEY
 ```
 
+### Run without Docker containers
+
+```bash
+nemo-evaluator-launcher run --config packages/nemo-evaluator-launcher/examples/local_basic.yaml \
+  --no-docker \
+  -o target.api_endpoint.api_key_name=NGC_API_KEY
+```
+
+Equivalent YAML:
+
+```yaml
+execution:
+  type: local
+  use_docker: false
+```
+
+When using `use_docker: false`, the requested benchmark task must be available from locally installed NeMo Evaluator packages (harness wheels). The launcher now validates this before execution and fails early if the harness/task is not installed.
+
 ## Environment Variables and Secrets
 
 Environment variables use the unified prefix syntax (`$host:`, `$lit:`, `$runtime:`) described in {ref}`env-vars-configuration`. Declare them at the top-level `env_vars:` section, at `evaluation.env_vars`, or per-task. Secret values are stored in a `.secrets.env` file alongside the generated `run.sh` and sourced at runtime — they never appear in the script itself.
@@ -58,6 +76,7 @@ The Local executor uses Docker volume mounts for data persistence:
 You can customize your local executor by specifying `extra_docker_args`.
 This parameter allows you to pass any flag to the `docker run` command that is executed by the NeMo Evaluator Launcher.
 You can use it to mount additional volumes, set environment variables or customize your network settings.
+`extra_docker_args` is ignored when `execution.use_docker: false`.
 
 For example, if you would like your job to use a specific docker network, you can specify:
 
diff --git a/packages/nemo-evaluator-launcher/src/nemo_evaluator_launcher/cli/run.py b/packages/nemo-evaluator-launcher/src/nemo_evaluator_launcher/cli/run.py
index 9b2ef628e..dcf5c8ae3 100644
--- a/packages/nemo-evaluator-launcher/src/nemo_evaluator_launcher/cli/run.py
+++ b/packages/nemo-evaluator-launcher/src/nemo_evaluator_launcher/cli/run.py
@@ -94,6 +94,13 @@ class Cmd:
             "If not specified, loads $PWD/.env if it exists."
         },
     )
+    no_docker: bool = field(
+        default=False,
+        alias=["--no-docker"],
+        metadata={
+            "help": "Run local executor tasks directly on host without launching Docker containers. Equivalent to setting execution.use_docker=false."
+        },
+    )
 
     def _parse_requested_tasks(self) -> list[str]:
         """Parse -t arguments into a list of task names.
@@ -207,6 +214,16 @@ def execute(self) -> None:
                 hydra_overrides=self.override,
             )
 
+        if self.no_docker:
+            if config.execution.type != "local":
+                raise ValueError(
+                    "--no-docker is only supported with execution.type=local."
+                )
+            is_struct = OmegaConf.is_struct(config)
+            OmegaConf.set_struct(config, False)
+            config.execution.use_docker = False
+            OmegaConf.set_struct(config, is_struct)
+
         # Apply task filtering if -t is specified
         if requested_tasks:
             config = filter_tasks(config, requested_tasks)
diff --git a/packages/nemo-evaluator-launcher/src/nemo_evaluator_launcher/common/helpers.py b/packages/nemo-evaluator-launcher/src/nemo_evaluator_launcher/common/helpers.py
index 6643fb26a..f2b4a6a4d 100644
--- a/packages/nemo-evaluator-launcher/src/nemo_evaluator_launcher/common/helpers.py
+++ b/packages/nemo-evaluator-launcher/src/nemo_evaluator_launcher/common/helpers.py
@@ -16,6 +16,7 @@
 import base64
 import datetime
 import os
+import shlex
 from dataclasses import dataclass
 from typing import Optional
 
@@ -61,8 +62,9 @@ def _str_to_echo_command(str_to_save: str, filename: str) -> CmdAndReadableComme
     debug_str = "\n".join(
         [f"# Contents of {filename}"] + ["# " + s for s in str_to_save.splitlines()]
     )
+    quoted_filename = shlex.quote(filename)
     return CmdAndReadableComment(
-        cmd=f'echo "{str_to_save_b64}" | base64 -d > {filename}', debug=debug_str
+        cmd=f'echo "{str_to_save_b64}" | base64 -d > {quoted_filename}', debug=debug_str
     )
 
 
@@ -167,6 +169,7 @@ def get_eval_factory_command(
     cfg: DictConfig,
     user_task_config: DictConfig,
     task_definition: dict,
+    output_dir: str = CONTAINER_RESULTS_DIR,
 ) -> CmdAndReadableComment:
     # This gets the eval_factory_config merged from both top-level and task-level.
     merged_nemo_evaluator_config = get_eval_factory_config(
@@ -214,7 +217,7 @@ def get_eval_factory_command(
     _set_nested_optionally_overriding(
         merged_nemo_evaluator_config,
         ["config", "output_dir"],
-        CONTAINER_RESULTS_DIR,
+        output_dir,
     )
     api_key_name = get_api_key_name(cfg)
     if api_key_name:
@@ -275,7 +278,7 @@ def get_eval_factory_command(
     if config_path:
         create_unresolved_config_cmd = _str_to_echo_command(
             open(config_path, "r").read(),
-            filename=f"{CONTAINER_RESULTS_DIR}/launcher_unresolved_config.yaml",
+            filename=f"{output_dir}/launcher_unresolved_config.yaml",
         )
         commands.append(create_unresolved_config_cmd.cmd)
         debug.append(create_unresolved_config_cmd.debug)
diff --git a/packages/nemo-evaluator-launcher/src/nemo_evaluator_launcher/configs/execution/local.yaml b/packages/nemo-evaluator-launcher/src/nemo_evaluator_launcher/configs/execution/local.yaml
index b025e5833..5f0a20f49 100644
--- a/packages/nemo-evaluator-launcher/src/nemo_evaluator_launcher/configs/execution/local.yaml
+++ b/packages/nemo-evaluator-launcher/src/nemo_evaluator_launcher/configs/execution/local.yaml
@@ -15,5 +15,6 @@
 #
 type: local
 output_dir: ???
+use_docker: true
 extra_docker_args: ""
 mode: sequential
diff --git a/packages/nemo-evaluator-launcher/src/nemo_evaluator_launcher/executors/local/executor.py b/packages/nemo-evaluator-launcher/src/nemo_evaluator_launcher/executors/local/executor.py
index 1d620e617..4057ebfb2 100644
--- a/packages/nemo-evaluator-launcher/src/nemo_evaluator_launcher/executors/local/executor.py
+++ b/packages/nemo-evaluator-launcher/src/nemo_evaluator_launcher/executors/local/executor.py
@@ -23,6 +23,7 @@
 import platform
 import shlex
 import shutil
+import signal
 import subprocess
 import time
 from typing import Iterator, List, Optional, Tuple, Union
@@ -67,6 +68,61 @@
 from nemo_evaluator_launcher.executors.registry import register_executor
 
 
+def _get_local_available_tasks() -> dict[str, set[str]]:
+    """Return locally installed NeMo Evaluator tasks grouped by harness."""
+    try:
+        from nemo_evaluator.api import get_available_evaluations
+    except ImportError as e:
+        raise RuntimeError(
+            "execution.use_docker=false requires `nemo-evaluator` to be installed locally. "
+            "Install nemo-evaluator (with the harness/task wheels you need), or enable Docker execution."
+        ) from e
+
+    framework_task_mapping, _, _ = get_available_evaluations()
+    return {
+        framework: set(tasks.keys())
+        for framework, tasks in framework_task_mapping.items()
+    }
+
+
+def _validate_task_available_locally(
+    *,
+    task_query: str,
+    task_definition: dict,
+    available_tasks_by_harness: dict[str, set[str]],
+) -> None:
+    """Validate that a task exists in locally installed NeMo Evaluator packages."""
+    harness_name = str(task_definition.get("harness") or "")
+    task_name = str(task_definition.get("task") or "")
+
+    if harness_name:
+        harness_tasks = available_tasks_by_harness.get(harness_name)
+        if harness_tasks is None:
+            available_harnesses = sorted(available_tasks_by_harness.keys())
+            raise ValueError(
+                f"Task '{task_query}' requires harness '{harness_name}', but this harness is not installed locally. "
+                f"Installed harnesses: {available_harnesses or ['<none>']}. "
+                "Install the corresponding NeMo Evaluator wheel, or run with Docker."
+            )
+        if task_name not in harness_tasks:
+            available_tasks = sorted(harness_tasks)
+            raise ValueError(
+                f"Task '{task_query}' is not available in installed harness '{harness_name}'. "
+                f"Available tasks in this harness: {available_tasks or ['<none>']}. "
+                "Install a wheel that contains this task, or run with Docker."
+            )
+        return
+
+    matching_harnesses = [
+        harness for harness, tasks in available_tasks_by_harness.items() if task_name in tasks
+    ]
+    if not matching_harnesses:
+        raise ValueError(
+            f"Task '{task_query}' is not available in locally installed NeMo Evaluator packages. "
+            "Install a wheel that contains this task, or run with Docker."
+        )
+
+
 @register_executor("local")
 class LocalExecutor(BaseExecutor):
     @classmethod
@@ -83,12 +139,21 @@ def execute_eval(cls, cfg: DictConfig, dry_run: bool = False) -> str:
         Raises:
             RuntimeError: If the run script fails.
         """
+        use_docker = bool(cfg.execution.get("use_docker", True))
+
         # Check if docker is available (skip in dry_run mode)
-        if not dry_run and shutil.which("docker") is None:
+        if use_docker and not dry_run and shutil.which("docker") is None:
             raise RuntimeError(
                 "Docker is not installed or not in PATH. "
                 "Please install Docker to run local evaluations."
             )
+        if not use_docker and cfg.deployment.type != "none":
+            raise ValueError(
+                "execution.use_docker=false is only supported with deployment.type=none."
+            )
+        local_available_tasks: dict[str, set[str]] | None = None
+        if not use_docker:
+            local_available_tasks = _get_local_available_tasks()
 
         # Generate invocation ID for this evaluation run
         invocation_id = generate_invocation_id()
@@ -136,6 +201,12 @@ def execute_eval(cls, cfg: DictConfig, dry_run: bool = False) -> str:
                 container=task.get("container"),
                 endpoint_type=task.get("endpoint_type"),
             )
+            if not use_docker:
+                _validate_task_available_locally(
+                    task_query=task.name,
+                    task_definition=task_definition,
+                    available_tasks_by_harness=local_available_tasks or {},
+                )
 
             # Track unlisted tasks for safeguard check
             if task_definition.get("is_unlisted", False):
@@ -194,11 +265,15 @@ def execute_eval(cls, cfg: DictConfig, dry_run: bool = False) -> str:
             dataset_mount_container = None
             dataset_env_var_value = None
             if "dataset_dir" in task:
-                dataset_mount_host = task["dataset_dir"]
-                # Get container mount path (default to /datasets if not specified)
-                dataset_mount_container = task.get("dataset_mount_path", "/datasets")
-                # Set NEMO_EVALUATOR_DATASET_DIR to the container mount path
-                dataset_env_var_value = dataset_mount_container
+                if use_docker:
+                    dataset_mount_host = task["dataset_dir"]
+                    # Get container mount path (default to /datasets if not specified)
+                    dataset_mount_container = task.get("dataset_mount_path", "/datasets")
+                    # Set NEMO_EVALUATOR_DATASET_DIR to the container mount path
+                    dataset_env_var_value = dataset_mount_container
+                else:
+                    # In no-docker mode, pass dataset_dir directly to local process.
+                    dataset_env_var_value = task["dataset_dir"]
 
             # Build env_groups for secrets file generation
             env_groups = {}
@@ -225,7 +300,12 @@ def execute_eval(cls, cfg: DictConfig, dry_run: bool = False) -> str:
             task_output_dir = output_dir / task.name
             task_output_dir.mkdir(parents=True, exist_ok=True)
             eval_factory_command_struct = get_eval_factory_command(
-                cfg, task, task_definition
+                cfg,
+                task,
+                task_definition,
+                output_dir=(
+                    "/results" if use_docker else str(task_output_dir / "artifacts")
+                ),
             )
             eval_factory_command = eval_factory_command_struct.cmd
             # The debug comment for placing into the script and easy debug. Reason
@@ -257,6 +337,7 @@ def execute_eval(cls, cfg: DictConfig, dry_run: bool = False) -> str:
                 "dataset_mount_host": dataset_mount_host,
                 "dataset_mount_container": dataset_mount_container,
                 "dataset_env_var_value": dataset_env_var_value,
+                "run_with_docker": use_docker,
             }
             evaluation_tasks.append(evaluation_task)
 
@@ -271,6 +352,7 @@ def execute_eval(cls, cfg: DictConfig, dry_run: bool = False) -> str:
                     evaluation_tasks=[evaluation_task],
                     auto_export_destinations=auto_export_destinations,
                     extra_docker_args=extra_docker_args,
+                    has_docker_tasks=use_docker,
                 ).rstrip("\n")
                 + "\n"
             )
@@ -288,6 +370,7 @@ def execute_eval(cls, cfg: DictConfig, dry_run: bool = False) -> str:
                     evaluation_tasks=evaluation_tasks,
                     auto_export_destinations=auto_export_destinations,
                     extra_docker_args=extra_docker_args,
+                    has_docker_tasks=use_docker,
                 ).rstrip("\n")
                 + "\n"
             )
@@ -386,8 +469,13 @@ def execute_eval(cls, cfg: DictConfig, dry_run: bool = False) -> str:
                     executor="local",
                     data={
                         "output_dir": str(evaluation_task["output_dir"]),
-                        "container": evaluation_task["client_container_name"],
+                        "container": (
+                            evaluation_task["client_container_name"]
+                            if use_docker
+                            else ""
+                        ),
                         "eval_image": evaluation_task["eval_image"],
+                        "use_docker": use_docker,
                     },
                     config=OmegaConf.to_object(cfg),
                 )
@@ -711,33 +799,46 @@ def kill_job(job_id: str) -> None:
                 f"Job {job_id} is not a local job (executor: {job_data.executor})"
             )
 
-        # Get container name from database
-        container_name = job_data.data.get("container")
-        if not container_name:
-            raise ValueError(f"No container name found for job {job_id}")
+        use_docker = bool(job_data.data.get("use_docker", True))
+        output_dir = pathlib.Path(job_data.data.get("output_dir", ""))
+        container_name = job_data.data.get("container") or ""
 
         killed_something = False
 
-        # First, try to stop the Docker container if it's running
-        result = subprocess.run(
-            shlex.split(f"docker stop {container_name}"),
-            capture_output=True,
-            text=True,
-            timeout=30,
-        )
-        if result.returncode == 0:
-            killed_something = True
-        # Don't raise error if container doesn't exist (might be still pulling)
-
-        # Find and kill Docker processes for this container
-        result = subprocess.run(
-            shlex.split(f"pkill -f 'docker run.*{container_name}'"),
-            capture_output=True,
-            text=True,
-            timeout=10,
-        )
-        if result.returncode == 0:
-            killed_something = True
+        # Try to stop script process group if a pid file is present.
+        pid_file = output_dir / "logs" / "stage.pid"
+        if pid_file.exists():
+            try:
+                pid = int(pid_file.read_text().strip())
+                if hasattr(os, "killpg"):
+                    os.killpg(pid, signal.SIGTERM)
+                else:
+                    os.kill(pid, signal.SIGTERM)
+                killed_something = True
+            except (OSError, ValueError):
+                pass
+
+        if use_docker and container_name:
+            # First, try to stop the Docker container if it's running
+            result = subprocess.run(
+                shlex.split(f"docker stop {container_name}"),
+                capture_output=True,
+                text=True,
+                timeout=30,
+            )
+            if result.returncode == 0:
+                killed_something = True
+            # Don't raise error if container doesn't exist (might be still pulling)
+
+            # Find and kill Docker processes for this container
+            result = subprocess.run(
+                shlex.split(f"pkill -f 'docker run.*{container_name}'"),
+                capture_output=True,
+                text=True,
+                timeout=10,
+            )
+            if result.returncode == 0:
+                killed_something = True
 
         # If we successfully killed something, mark as killed
         if killed_something:
@@ -758,7 +859,13 @@ def kill_job(job_id: str) -> None:
         # Use common helper to get informative error message based on job status
         current_status = status_list[0].state if status_list else None
         error_msg = LocalExecutor.get_kill_failure_message(
-            job_id, f"container: {container_name}", current_status
+            job_id,
+            (
+                f"container: {container_name}"
+                if container_name
+                else f"pid_file: {pid_file}"
+            ),
+            current_status,
         )
         raise RuntimeError(error_msg)
 
diff --git a/packages/nemo-evaluator-launcher/src/nemo_evaluator_launcher/executors/local/run.template.sh b/packages/nemo-evaluator-launcher/src/nemo_evaluator_launcher/executors/local/run.template.sh
index 76f048cd9..d5575c54c 100644
--- a/packages/nemo-evaluator-launcher/src/nemo_evaluator_launcher/executors/local/run.template.sh
+++ b/packages/nemo-evaluator-launcher/src/nemo_evaluator_launcher/executors/local/run.template.sh
@@ -14,8 +14,10 @@
 # limitations under the License.
 
 
-# check if docker exists
+# check if docker exists when any task uses docker
+{% if has_docker_tasks %}
 command -v docker >/dev/null 2>&1 || { echo 'docker not found'; exit 1; }
+{% endif %}
 
 # Initialize: remove killed jobs file from previous runs
 script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
@@ -57,14 +59,17 @@ else
     # Debug contents of the eval factory command's config
     {{ task.eval_factory_command_debug_comment | indent(4) }}
 
-    # Docker run with eval factory command
+    # Execute evaluation task
     (
         {% if task.secrets_env_content -%}
-        # Source secrets (scoped to subshell); re-exports happen before each docker run
+        # Source secrets (scoped to subshell)
         source "$task_dir/.secrets.env"
         {% endif -%}
 
+        echo "$$" > "$logs_dir/stage.pid"
+        trap 'rm -f "$logs_dir/stage.pid"' EXIT
         echo "$(date -u +%Y-%m-%dT%H:%M:%SZ)" > "$logs_dir/stage.running"
+        {% if task.run_with_docker %}
         {% if task.deployment %}
         {% if task.deployment_reexport_cmd -%}
         # Re-export deployment env vars to original names
@@ -126,9 +131,19 @@ else
         echo "Container completed successfully" >&2;
         exit 0;
       ' > "$logs_dir/client_stdout.log" 2>&1
-    exit_code=$?
+        {% else %}
+        {% if task.eval_reexport_cmd -%}
+        # Re-export eval env vars to original names
+        {{ task.eval_reexport_cmd }}
+        {% endif -%}
+        {% if task.dataset_env_var_value -%}
+        export NEMO_EVALUATOR_DATASET_DIR="{{ task.dataset_env_var_value }}"
+        {% endif -%}
+        {{ task.eval_factory_command }} > "$logs_dir/client_stdout.log" 2>&1
+        {% endif %}
+        exit_code=$?
 
-    {% if task.deployment %}
+    {% if task.run_with_docker and task.deployment %}
     # Stop the server
     docker stop $SERVER_CONTAINER_NAME 2>/dev/null || true
     {% endif %}
diff --git a/packages/nemo-evaluator-launcher/src/nemo_evaluator_launcher/resources/config_templates/execution/local.yaml b/packages/nemo-evaluator-launcher/src/nemo_evaluator_launcher/resources/config_templates/execution/local.yaml
index a1f4cc946..9c3812f4c 100644
--- a/packages/nemo-evaluator-launcher/src/nemo_evaluator_launcher/resources/config_templates/execution/local.yaml
+++ b/packages/nemo-evaluator-launcher/src/nemo_evaluator_launcher/resources/config_templates/execution/local.yaml
@@ -3,3 +3,4 @@ defaults:
 
 execution:
   output_dir: nel-results
+  use_docker: true
diff --git a/packages/nemo-evaluator-launcher/tests/unit_tests/test_cli_integration.py b/packages/nemo-evaluator-launcher/tests/unit_tests/test_cli_integration.py
index 5a2aa7972..2dc8f810d 100644
--- a/packages/nemo-evaluator-launcher/tests/unit_tests/test_cli_integration.py
+++ b/packages/nemo-evaluator-launcher/tests/unit_tests/test_cli_integration.py
@@ -819,3 +819,49 @@ def test_config_parameter_with_various_extensions(
             run_cmd.execute()
             call_kwargs = mock_compose.call_args.kwargs
             assert call_kwargs["config_name"] == "test_config"
+
+    def test_no_docker_flag_sets_execution_use_docker_false(
+        self, mock_execdb, mock_api_endpoint_check, mock_print
+    ):
+        config_dict = {
+            "deployment": {"type": "none"},
+            "execution": {"type": "local", "output_dir": "/tmp/test_output"},
+            "target": {
+                "api_endpoint": {"api_key_name": "test_key", "model_id": "test_model"}
+            },
+            "evaluation": {"tasks": [{"name": "test_task_1"}]},
+        }
+
+        with (
+            patch("nemo_evaluator_launcher.api.types.hydra.compose") as mock_compose,
+            patch("nemo_evaluator_launcher.api.functional.run_eval") as mock_run_eval,
+        ):
+            mock_compose.return_value = OmegaConf.create(config_dict)
+            mock_run_eval.return_value = None
+
+            run_cmd = RunCmd(no_docker=True, dry_run=True)
+            run_cmd.execute()
+
+            called_cfg = mock_run_eval.call_args.args[0]
+            assert called_cfg.execution.use_docker is False
+
+    def test_no_docker_flag_rejects_non_local_executor(
+        self, mock_execdb, mock_api_endpoint_check, mock_print
+    ):
+        config_dict = {
+            "deployment": {"type": "none"},
+            "execution": {"type": "dummy", "output_dir": "/tmp/test_output"},
+            "target": {
+                "api_endpoint": {"api_key_name": "test_key", "model_id": "test_model"}
+            },
+            "evaluation": {"tasks": [{"name": "test_task_1"}]},
+        }
+
+        with patch("nemo_evaluator_launcher.api.types.hydra.compose") as mock_compose:
+            mock_compose.return_value = OmegaConf.create(config_dict)
+
+            with pytest.raises(
+                ValueError,
+                match="--no-docker is only supported with execution.type=local",
+            ):
+                RunCmd(no_docker=True, dry_run=True).execute()
diff --git a/packages/nemo-evaluator-launcher/tests/unit_tests/test_get_eval_factory_command.py b/packages/nemo-evaluator-launcher/tests/unit_tests/test_get_eval_factory_command.py
index 1812d270e..1f3424178 100644
--- a/packages/nemo-evaluator-launcher/tests/unit_tests/test_get_eval_factory_command.py
+++ b/packages/nemo-evaluator-launcher/tests/unit_tests/test_get_eval_factory_command.py
@@ -93,3 +93,34 @@ def test_get_eval_factory_command_basic(monkeypatch):
 
     # The command to run eval is present
     assert "&& $cmd run_eval --run_config config_ef.yaml" in result.cmd
+
+
+def test_get_eval_factory_command_custom_output_dir():
+    cfg = OmegaConf.create(
+        {
+            "evaluation": {"nemo_evaluator_config": {"config": {}}},
+            "deployment": {"type": "none"},
+            "target": {
+                "api_endpoint": {
+                    "url": "https://example.test/api",
+                    "model_id": "model-123",
+                    "api_key_name": "MY_API_KEY",
+                }
+            },
+        }
+    )
+    user_task_config = OmegaConf.create({"nemo_evaluator_config": {"config": {}}})
+    task_definition = {"endpoint_type": "chat", "task": "my_task"}
+
+    result = get_eval_factory_command(
+        cfg,
+        user_task_config,
+        task_definition,
+        output_dir="/tmp/nel/results",
+    )
+
+    b64 = _extract_b64_from_echo_cmd(result.cmd)
+    decoded_yaml = base64.b64decode(b64.encode("utf-8")).decode("utf-8")
+    merged = yaml.safe_load(decoded_yaml)
+
+    assert merged["config"]["output_dir"] == "/tmp/nel/results"
diff --git a/packages/nemo-evaluator-launcher/tests/unit_tests/test_local_executor.py b/packages/nemo-evaluator-launcher/tests/unit_tests/test_local_executor.py
index 103164d01..00e82305e 100644
--- a/packages/nemo-evaluator-launcher/tests/unit_tests/test_local_executor.py
+++ b/packages/nemo-evaluator-launcher/tests/unit_tests/test_local_executor.py
@@ -261,6 +261,134 @@ def mock_get_task_def_side_effect(*_args, **kwargs):
                 if env_var in os.environ:
                     del os.environ[env_var]
 
+    def test_execute_eval_dry_run_no_docker_generates_host_commands(
+        self, sample_config, mock_tasks_mapping
+    ):
+        """When execution.use_docker=false, generated scripts should run on host."""
+        sample_config.execution.use_docker = False
+        os.environ["TEST_API_KEY"] = "test_key_value"
+        os.environ["GLOBAL_VALUE"] = "global_env_value"
+        os.environ["TASK_VALUE"] = "task_env_value"
+
+        try:
+            with (
+                patch(
+                    "nemo_evaluator_launcher.executors.local.executor.load_tasks_mapping"
+                ) as mock_load_mapping,
+                patch(
+                    "nemo_evaluator_launcher.executors.local.executor._get_local_available_tasks"
+                ) as mock_get_local_tasks,
+                patch(
+                    "nemo_evaluator_launcher.executors.local.executor.get_task_definition_for_job"
+                ) as mock_get_task_def,
+                patch(
+                    "nemo_evaluator_launcher.executors.local.executor.get_eval_factory_command"
+                ) as mock_get_command,
+                patch("builtins.print"),
+            ):
+                mock_load_mapping.return_value = mock_tasks_mapping
+                mock_get_local_tasks.return_value = {
+                    "lm-eval": {"test_task_1"},
+                    "helm": {"test_task_2"},
+                }
+
+                def mock_get_task_def_side_effect(*_args, **kwargs):
+                    task_name = kwargs.get("task_query")
+                    mapping = kwargs.get("base_mapping", {})
+                    for (_harness, name), definition in mapping.items():
+                        if name == task_name:
+                            return definition
+                    raise KeyError(f"Task {task_name} not found")
+
+                mock_get_task_def.side_effect = mock_get_task_def_side_effect
+                from nemo_evaluator_launcher.common.helpers import CmdAndReadableComment
+
+                mock_get_command.return_value = CmdAndReadableComment(
+                    cmd="nemo-evaluator run_eval --run_config config_ef.yaml",
+                    debug="# Host command",
+                )
+
+                invocation_id = LocalExecutor.execute_eval(sample_config, dry_run=True)
+
+                output_base = pathlib.Path(sample_config.execution.output_dir)
+                output_dir = None
+                for item in output_base.iterdir():
+                    if item.is_dir() and item.name.endswith(f"-{invocation_id}"):
+                        output_dir = item
+                        break
+                assert output_dir is not None
+
+                run_script = (output_dir / "test_task_1" / "run.sh").read_text()
+                assert "docker run" not in run_script
+                assert "docker not found" not in run_script
+                assert (
+                    'nemo-evaluator run_eval --run_config config_ef.yaml > "$logs_dir/client_stdout.log" 2>&1'
+                    in run_script
+                )
+
+                for call in mock_get_command.call_args_list:
+                    assert call.kwargs["output_dir"].endswith("/artifacts")
+        finally:
+            for env_var in ["TEST_API_KEY", "GLOBAL_VALUE", "TASK_VALUE"]:
+                if env_var in os.environ:
+                    del os.environ[env_var]
+
+    def test_execute_eval_no_docker_with_deployment_raises(self, sample_config):
+        """No-docker mode only supports deployment.type=none."""
+        sample_config.execution.use_docker = False
+        sample_config.deployment.type = "vllm"
+
+        with pytest.raises(
+            ValueError,
+            match="execution.use_docker=false is only supported with deployment.type=none",
+        ):
+            LocalExecutor.execute_eval(sample_config, dry_run=True)
+
+    def test_execute_eval_no_docker_missing_local_task_raises(
+        self, sample_config, mock_tasks_mapping
+    ):
+        sample_config.execution.use_docker = False
+        os.environ["TEST_API_KEY"] = "test_key_value"
+        os.environ["GLOBAL_VALUE"] = "global_env_value"
+        os.environ["TASK_VALUE"] = "task_env_value"
+
+        try:
+            with (
+                patch(
+                    "nemo_evaluator_launcher.executors.local.executor.load_tasks_mapping"
+                ) as mock_load_mapping,
+                patch(
+                    "nemo_evaluator_launcher.executors.local.executor._get_local_available_tasks"
+                ) as mock_get_local_tasks,
+                patch(
+                    "nemo_evaluator_launcher.executors.local.executor.get_task_definition_for_job"
+                ) as mock_get_task_def,
+            ):
+                mock_load_mapping.return_value = mock_tasks_mapping
+                mock_get_local_tasks.return_value = {
+                    "lm-eval": {"some_other_task"},
+                    "helm": {"test_task_2"},
+                }
+
+                def mock_get_task_def_side_effect(*_args, **kwargs):
+                    task_name = kwargs.get("task_query")
+                    mapping = kwargs.get("base_mapping", {})
+                    for (_harness, name), definition in mapping.items():
+                        if name == task_name:
+                            return definition
+                    raise KeyError(f"Task {task_name} not found")
+
+                mock_get_task_def.side_effect = mock_get_task_def_side_effect
+
+                with pytest.raises(
+                    ValueError, match="not available in installed harness"
+                ):
+                    LocalExecutor.execute_eval(sample_config, dry_run=True)
+        finally:
+            for env_var in ["TEST_API_KEY", "GLOBAL_VALUE", "TASK_VALUE"]:
+                if env_var in os.environ:
+                    del os.environ[env_var]
+
 
 class TestLocalExecutorGetStatus:
     """Test LocalExecutor get_status functionality."""