Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
201 changes: 187 additions & 14 deletions src/hooks/hook_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,12 +152,12 @@ def _build_hook_env(
for skill-declared hooks; empty for everything else).
* ``CLAUDE_ENV_FILE`` — per-fire ephemeral env file path. Set ONLY for
the three lifecycle events that benefit from env propagation
(``SessionStart``, ``Setup``, ``CwdChanged``). For other events:
empty string. Per N4: this WI sets the path; the
sourcing-and-applying loop (read the file back and apply exports to
subsequent shells in the session) is a separate follow-up ticket.
TODO(ch12-followup): ticket #<TBD> covers the env-file source/apply
cycle.
(``SessionStart``, ``Setup``, ``CwdChanged``) and only for
POSIX-shell hooks (TS parity: PowerShell hooks are skipped — they
would write ``$env:FOO = ...`` syntax a POSIX source can't
consume). After the hook completes successfully the executor
evaluates the file and applies the exports to subsequent Bash
tool commands (#281, ``_apply_env_file``).
"""
event_name = stdin_data.get("hook_event", "")
workspace_root = ""
Expand All @@ -166,7 +166,13 @@ def _build_hook_env(
if wr is not None:
workspace_root = str(wr)

env_file = _env_file_for_event(event_name)
# PowerShell hooks never get the env file (TS parity, hooks.ts
# ``!isPowerShell``): they'd write ``$env:FOO = ...`` syntax that a
# POSIX source can't consume.
if hook.shell == "powershell":
env_file = ""
else:
env_file = _env_file_for_event(event_name)

return {
**os.environ,
Expand All @@ -186,19 +192,158 @@ def _env_file_for_event(event_name: str) -> str:
"empty" as "no env propagation requested."

The file is per-fire ephemeral: a unique path under
``~/.clawcodex/hook-env/<event>.<pid>.<nanos>``. This WI does NOT
create the file or read it back; it only computes the path. Sourcing is
a follow-up.
``~/.clawcodex/hook-env/<event>.<pid>.<nanos>``. The parent directory
is created here so a hook's ``echo ... > "$CLAUDE_ENV_FILE"`` redirect
can succeed; the executor reads the file back and applies the exports
after the hook completes (#281, ``_apply_env_file``).
"""
# TS also includes FileChanged (hooks.ts:1097); add it here when the
# file-watcher event is ported.
if event_name not in ("SessionStart", "Setup", "CwdChanged"):
return ""
home = os.path.expanduser("~")
env_dir = os.path.join(home, ".clawcodex", "hook-env")
try:
os.makedirs(env_dir, exist_ok=True)
except OSError:
return "" # unwritable home — skip env propagation for this fire
return os.path.join(
home, ".clawcodex", "hook-env",
f"{event_name}.{os.getpid()}.{time.time_ns()}",
env_dir, f"{event_name}.{os.getpid()}.{time.time_ns()}",
)


_MAX_ENV_FILE_BYTES = 256 * 1024

# Noise the sourcing shell itself sets — never part of the hook's intent.
_SHELL_NOISE_KEYS = frozenset({"_", "SHLVL", "PWD", "OLDPWD"})


def _shell_eval_env_exports(env_file: str) -> dict[str, str]:
"""Evaluate the env file with a real POSIX shell and return the env
delta it produced (#281).

Sourcing — rather than parsing ``KEY=VAL`` lines — is load-bearing:
the canonical use case is ``export PATH="$HOME/bin:$PATH"`` (venv /
conda activation), and a literal parse would store the unexpanded
``$HOME/bin:$PATH`` string, corrupting PATH for every later spawn.
The shell sees (and the diff baseline includes) the current session
view, so a later hook can prepend to a PATH an earlier hook already
extended. Same trust boundary as the hook itself, which already ran
arbitrary shell.

Known limits: ``unset FOO`` is ignored (the diff only observes
additions/changes); the eval is a deliberately synchronous
``subprocess.run`` inside the async hook path — ~10ms, once per fire
of three rare lifecycle events.
"""
import subprocess

if os.name == "nt":
return {} # POSIX-shell contract; PowerShell hooks don't get the var
from .session_env import get_session_hook_env

base = {**os.environ, **get_session_hook_env()}
try:
# ``set -a``: auto-export plain ``KEY=VAL`` assignments too — the
# documented contract accepts both with and without ``export``.
proc = subprocess.run(
[
"/bin/sh",
"-c",
'set -a; . "$1" >/dev/null 2>&1 || exit 9; env -0',
"sh",
env_file,
],
capture_output=True,
env=base,
timeout=10,
)
except Exception:
logger.debug("failed to evaluate %s", env_file, exc_info=True)
return {}
if proc.returncode != 0:
logger.warning(
"CLAUDE_ENV_FILE %s failed to source (exit %s); ignoring",
env_file,
proc.returncode,
)
return {}
exports: dict[str, str] = {}
for entry in proc.stdout.split(b"\0"):
if not entry:
continue
raw_key, sep, raw_val = entry.partition(b"=")
if not sep:
continue
key = raw_key.decode("utf-8", errors="replace")
if key in _SHELL_NOISE_KEYS:
continue
value = raw_val.decode("utf-8", errors="replace")
if base.get(key) != value:
exports[key] = value
return exports


def _discard_env_file(env_file: str) -> None:
"""Remove the per-fire env file without applying it (fail/timeout/abort
paths — partial writes from an unsuccessful hook are untrusted)."""
if not env_file:
return
try:
os.unlink(env_file)
except OSError:
pass


def _apply_env_file(env_file: str, event: str) -> None:
"""Source-and-apply cycle for ``CLAUDE_ENV_FILE`` (#281).

Evaluates the per-fire env file a SessionStart/Setup/CwdChanged hook
may have written, merges the resulting exports into the session hook
env (consumed by the Bash tool at spawn — NOT the host process env;
TS parity: ``sessionEnvironment.ts`` injects into bash commands
only), and removes the ephemeral file. Fail-soft throughout — env
propagation is a convenience, never a reason to fail the hook
pipeline.

Deliberate divergence from TS: TS sources whatever the file holds
regardless of how the hook exited; here only a hook that completed
successfully gets its exports applied (the caller gates on
``exit_code == 0``) — partial writes from a failed/timed-out hook
are discarded.
"""
if not env_file:
return
try:
if not os.path.isfile(env_file):
return
if os.path.getsize(env_file) > _MAX_ENV_FILE_BYTES:
logger.warning(
"CLAUDE_ENV_FILE %s exceeds %d bytes; ignoring",
env_file,
_MAX_ENV_FILE_BYTES,
)
return
exports = _shell_eval_env_exports(env_file)
if exports:
from .session_env import merge_into_bucket

merge_into_bucket(event, exports)
logger.debug(
"applied %d env export(s) from %s hook env file: %s",
len(exports),
event,
sorted(exports.keys()),
)
except Exception:
logger.debug("failed to apply %s", env_file, exc_info=True)
finally:
try:
os.unlink(env_file)
except OSError:
pass


async def _execute_command_hook(
hook: HookConfig,
stdin_data: dict[str, Any],
Expand All @@ -213,9 +358,17 @@ async def _execute_command_hook(
effective_timeout = (hook.timeout or timeout_ms) / 1000.0
start_time = time.monotonic()

env_file = ""
try:
stdin_json = json.dumps(stdin_data, default=str)

# Built once per fire: the env dict carries the unique
# CLAUDE_ENV_FILE path that the source-and-apply step below must
# read back (#281) — calling _build_hook_env twice would mint two
# different paths.
hook_env = _build_hook_env(hook, stdin_data, tool_use_context)
env_file = hook_env.get("CLAUDE_ENV_FILE", "")

# Round-2 / Ch12 — per-hook shell selection. ``shell="powershell"``
# spawns ``pwsh`` with explicit argv and skips the bash-shell path.
# ``None`` / ``"bash"`` keeps the historical ``create_subprocess_shell``
Expand Down Expand Up @@ -248,7 +401,7 @@ async def _execute_command_hook(
stdin=asyncio.subprocess.PIPE,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
env=_build_hook_env(hook, stdin_data, tool_use_context),
env=hook_env,
)
else:
# Default (bash on POSIX via /bin/sh, the historical path).
Expand All @@ -260,7 +413,7 @@ async def _execute_command_hook(
stdin=asyncio.subprocess.PIPE,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
env=_build_hook_env(hook, stdin_data, tool_use_context),
env=hook_env,
)

try:
Expand Down Expand Up @@ -313,6 +466,12 @@ async def _execute_command_hook(
command=command,
)

# Source-and-apply (#281): only a hook that completed successfully
# gets its env exports applied — a timed-out, aborted, or failed
# hook's partial writes are discarded by the finally below
# (deliberate divergence from TS, which sources unconditionally).
_apply_env_file(env_file, stdin_data.get("hook_event", ""))

result = HookResult(
exit_code=0,
stdout=stdout,
Expand Down Expand Up @@ -359,6 +518,11 @@ async def _execute_command_hook(
duration_ms=duration_ms,
command=command,
)
finally:
# Ephemeral-file cleanup on every exit path. A no-op after a
# successful apply (which already unlinked); on timeout / abort /
# non-zero exit it discards unapplied partial writes.
_discard_env_file(env_file)


async def _run_hooks_for_event(
Expand All @@ -369,6 +533,15 @@ async def _run_hooks_for_event(
abort_signal: Any | None = None,
timeout_ms: int = TOOL_HOOK_EXECUTION_TIMEOUT_MS,
) -> AsyncGenerator[dict[str, Any], None]:
# #281: each fire of an env-propagating event REPLACES that event's
# session exports — for CwdChanged this is the TS clearing semantics
# (clearCwdEnvFiles): the previous directory's per-project env never
# leaks into the next one, even when the new cwd defines no hooks.
if event in ("SessionStart", "Setup", "CwdChanged"):
from .session_env import clear_event_bucket

clear_event_bucket(event)

# WI-0.2 — workspace-trust gate. Skip non-policy hooks while the workspace
# is untrusted. The per-hook policy check happens below since policy-source
# identification is per-HookConfig.
Expand Down
50 changes: 50 additions & 0 deletions src/hooks/session_env.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
"""Session-scoped env exports written by lifecycle hooks (#281).

Holds the evaluated exports from ``CLAUDE_ENV_FILE`` writes by
SessionStart / Setup / CwdChanged hooks, bucketed per event. The Bash
tool merges :func:`get_session_hook_env` over ``os.environ`` at spawn —
scoping the contract to "subsequent Bash tool commands" (TS parity:
``sessionEnvironment.ts`` injects into bash commands only, never the
host process env).

Bucketing per event gives CwdChanged the TS clearing semantics for
free: each fire replaces that event's bucket, so per-project exports
from the previous directory don't leak into the next one
(TS ``clearCwdEnvFiles``).

Leaf module: importable from the Bash tool without dragging in the hook
executor stack.
"""

from __future__ import annotations

# Merge precedence, lowest first (TS HOOK_ENV_PRIORITY,
# sessionEnvironment.ts:146-151: setup < sessionstart < cwdchanged —
# SessionStart overrides Setup on key conflict).
_ENV_EVENTS = ("Setup", "SessionStart", "CwdChanged")

_buckets: dict[str, dict[str, str]] = {}


def clear_event_bucket(event: str) -> None:
"""Drop an event's exports — called at the start of each fire so a
re-fire (e.g. a cwd change) replaces rather than accumulates."""
_buckets.pop(event, None)


def merge_into_bucket(event: str, exports: dict[str, str]) -> None:
if not exports or event not in _ENV_EVENTS:
return
_buckets.setdefault(event, {}).update(exports)


def get_session_hook_env() -> dict[str, str]:
"""The merged hook-export view, later lifecycle events winning."""
merged: dict[str, str] = {}
for event in _ENV_EVENTS:
merged.update(_buckets.get(event, {}))
return merged


def reset_session_hook_env_for_testing() -> None:
_buckets.clear()
7 changes: 7 additions & 0 deletions src/hooks/session_hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,13 @@ async def run_session_start_hooks(
reg = registry or get_global_hook_registry()
hooks = await reg.get_hooks_for_event(SESSION_START_EVENT)

# #281: each fire REPLACES the event's session exports (the same
# invariant _run_hooks_for_event enforces for its dispatch path) —
# a SessionStart re-fire (resume, /clear) must not accumulate.
from .session_env import clear_event_bucket

clear_event_bucket(SESSION_START_EVENT)

results: list[dict[str, Any]] = []
for hook in hooks:
stdin_data = {
Expand Down
17 changes: 17 additions & 0 deletions src/tool_system/tools/bash/background.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,13 +76,30 @@ def spawn_background_bash(
# ``stdin=DEVNULL`` mirrors the foreground bash path: prevents background
# commands that read fd 0 from blocking on a TTY inherited from clawcodex's
# REPL (see bash_tool.py:_run_bash_with_abort for the same reasoning).
# The session hook env (#281) is merged exactly like the foreground path.
popen_env = None
try:
from src.hooks.session_env import get_session_hook_env

session_env = get_session_hook_env()
if session_env:
import os as _os

popen_env = {**_os.environ, **session_env}
except Exception:
import logging

logging.getLogger(__name__).debug(
"session hook env merge failed", exc_info=True
)
proc = subprocess.Popen(
["bash", "-lc", wrapped],
cwd=str(cwd),
stdin=subprocess.DEVNULL,
stdout=output_handle,
stderr=subprocess.STDOUT,
start_new_session=True,
env=popen_env,
)

started_at = time.time()
Expand Down
16 changes: 16 additions & 0 deletions src/tool_system/tools/bash/bash_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,22 @@ def _run_bash_with_abort(
"stderr": subprocess.PIPE,
"text": True,
}
# #281: exports written by SessionStart/Setup/CwdChanged hooks via
# CLAUDE_ENV_FILE apply to subsequent Bash tool commands (and ONLY
# here — the host process env is untouched; TS sessionEnvironment.ts
# scopes the contract to bash commands the same way).
try:
from src.hooks.session_env import get_session_hook_env

_session_env = get_session_hook_env()
if _session_env:
popen_kwargs["env"] = {**_os_mod.environ, **_session_env}
except Exception:
import logging

logging.getLogger(__name__).debug(
"session hook env merge failed", exc_info=True
)
if _sys_mod.platform == "win32":
popen_kwargs["creationflags"] = getattr(
subprocess, "CREATE_NEW_PROCESS_GROUP", 0
Expand Down
Loading