Widen RunBatch/ReconstructionLoss for structured outputs

Oliver Clive-Griffin · claude · Oliver Clive-Griffin · commit 1da9e13b1d89 · 2026-05-27T13:07:35.000Z
Lets experiments package per-batch context (padding masks, labels, MSA
aux features) into output dataclasses instead of smuggling them through
tensor shapes. Surfaced while stress-testing the abstractions against
ESM2, Carbon, and GPN-MSA bio models.

- RunBatch: (model, batch) -&gt; Tensor → -&gt; Any
- ReconstructionLoss args: (pred, target) → (output, target_output); types Any
- OutputWithCache.output, MetricContext.target_out: Tensor → Any
- (sum, n) return shape kept — earns its keep for variable-mask eval
- Notes the tied-embedding gap in make_components (deferred)

Co-Authored-By: Claude Opus 4.7 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/param_decomp/batch_and_loss_fns.py b/param_decomp/batch_and_loss_fns.py
@@ -11,19 +11,38 @@
 
 
 class RunBatch(Protocol):
-    """Callable that runs one batch through `model` and returns the output tensor."""
+    """Callable that runs one batch through `model` and returns its output.
 
-    def __call__(self, model: nn.Module, batch: Any) -> Tensor: ...
+    The output type is experiment-defined (`Any`) — typically a tensor of logits, but
+    may be a dataclass / dict carrying additional fields (attention masks, hidden
+    states, labels) that the experiment's `ReconstructionLoss` consumes. The same
+    `RunBatch` is invoked on both the frozen target and the decomposed model, so the
+    two `output` values it produces share a structure.
+    """
 
+    def __call__(self, model: nn.Module, batch: Any) -> Any: ...
 
-class ReconstructionLoss(Protocol):
-    """Callable that compares `pred` against `target` and returns `(sum, n_elements)`.
 
-    The first entry is the unreduced sum of per-element losses; the second is the count
-    it summed over. Callers reduce `sum / n_elements` to a mean as needed.
+class ReconstructionLoss(Protocol):
+    """Compare a decomposed-model `output` against the frozen-target `target_output`.
+
+    Both are whatever the experiment's `RunBatch` returns. The return pair
+    `(sum, n_elements)` is the unreduced sum of per-element losses and the count it
+    summed over (or sum-of-weights for weighted/masked losses); callers reduce
+    `sum / n_elements` to a mean as needed.
+
+    Per-batch context the loss needs (padding masks, MLM-masked positions,
+    per-channel weights, labels) rides on the `output` / `target_output` structure
+    — experiments are responsible for packaging it inside `RunBatch`. Static aux
+    state (e.g. a k-mer→nucleotide lookup table) lives in a closure / partial /
+    `__call__`-bearing class — the Protocol stays minimal.
     """
 
-    def __call__(self, pred: Tensor, target: Tensor) -> tuple[Float[Tensor, ""], int]: ...
+    def __call__(
+        self,
+        output: Any,
+        target_output: Any,
+    ) -> tuple[Float[Tensor, ""], int]: ...
 
 
 def move_batch_to_device(batch: Any, device: str | torch.device) -> Any:
diff --git a/param_decomp/component_model.py b/param_decomp/component_model.py
@@ -28,10 +28,12 @@ class OutputWithCache(NamedTuple):
     """Forward output paired with per-module cached activations.
 
     Cache keys are target-module paths (or `f"{path}_{kind}"` for component-acts entries);
-    contents depend on the `cache_type` requested.
+    contents depend on the `cache_type` requested. `output` is whatever `RunBatch`
+    returns — typically a tensor but may be a dataclass / dict for experiments that
+    package per-batch context (masks, labels) for `ReconstructionLoss`.
     """
 
-    output: Tensor
+    output: Any
     cache: dict[str, Tensor]
 
 
@@ -168,10 +170,10 @@ def __call__(
         batch: Any,
         mask_infos: dict[str, ComponentsMaskInfo] | None = None,
         cache_type: Literal["none"] = "none",
-    ) -> Tensor: ...
+    ) -> Any: ...
 
     @override
-    def __call__(self, *args: Any, **kwargs: Any) -> Tensor | OutputWithCache:
+    def __call__(self, *args: Any, **kwargs: Any) -> Any | OutputWithCache:
         return super().__call__(*args, **kwargs)
 
     @override
@@ -180,7 +182,7 @@ def forward(
         batch: Any,
         mask_infos: dict[str, ComponentsMaskInfo] | None = None,
         cache_type: Literal["component_acts", "input", "output", "none"] = "none",
-    ) -> Tensor | OutputWithCache:
+    ) -> Any | OutputWithCache:
         """Run the target model with optional component replacement and/or caching.
 
         With no extra args, this is just a forward pass through the frozen target model.
@@ -220,7 +222,7 @@ def forward(
             )
 
         with self._attach_forward_hooks(hooks):
-            out: Tensor = self._run_batch(self.target_model, batch)
+            out: Any = self._run_batch(self.target_model, batch)
 
         match cache_type:
             case "input" | "output" | "component_acts":
diff --git a/param_decomp/components.py b/param_decomp/components.py
@@ -274,6 +274,12 @@ def make_components(
         Dict keyed by the same submodule paths, mapping to a `Components` instance whose
         weights have been initialised but not yet trained.
     """
+    # NOTE: storage-tied weights (e.g. `tie_word_embeddings=True` on Llama/ESM/GPT-2,
+    # where `embed_tokens.weight is lm_head.weight`) are not detected here — decomposing
+    # both sides of a tie produces two independent `Components` instances that silently
+    # learn the same target. Deferred: we don't currently decompose embeddings, so this
+    # is dormant. Fix would be to detect shared `weight.data_ptr()` and either share one
+    # `Components` instance or auto-add to `tied_weights`.
     out: dict[str, Components] = {}
     for path, C in module_to_c.items():
         target_module = target_model.get_submodule(path)
diff --git a/param_decomp/metrics/ci_masked_recon.py b/param_decomp/metrics/ci_masked_recon.py
@@ -20,7 +20,7 @@ class CIMaskedReconLossConfig(LossMetricConfig):
 def _ci_masked_recon_loss_update(
     model: ComponentModel,
     batch: Any,
-    target_out: Tensor,
+    target_out: Any,
     ci: dict[str, Float[Tensor, "... C"]],
     reconstruction_loss: ReconstructionLoss,
 ) -> tuple[Float[Tensor, ""], int]:
@@ -32,7 +32,7 @@ def _ci_masked_recon_loss_update(
 def ci_masked_recon_loss(
     model: ComponentModel,
     batch: Any,
-    target_out: Tensor,
+    target_out: Any,
     ci: dict[str, Float[Tensor, "... C"]],
     reconstruction_loss: ReconstructionLoss,
 ) -> Float[Tensor, ""]:
diff --git a/param_decomp/metrics/ci_masked_recon_layerwise.py b/param_decomp/metrics/ci_masked_recon_layerwise.py
@@ -21,7 +21,7 @@ class CIMaskedReconLayerwiseLossConfig(LossMetricConfig):
 def _ci_masked_recon_layerwise_loss_update(
     model: ComponentModel,
     batch: Any,
-    target_out: Tensor,
+    target_out: Any,
     ci: dict[str, Float[Tensor, "... C"]],
     reconstruction_loss: ReconstructionLoss,
 ) -> tuple[Float[Tensor, ""], int]:
@@ -39,7 +39,7 @@ def _ci_masked_recon_layerwise_loss_update(
 def ci_masked_recon_layerwise_loss(
     model: ComponentModel,
     batch: Any,
-    target_out: Tensor,
+    target_out: Any,
     ci: dict[str, Float[Tensor, "... C"]],
     reconstruction_loss: ReconstructionLoss,
 ) -> Float[Tensor, ""]:
diff --git a/param_decomp/metrics/ci_masked_recon_subset.py b/param_decomp/metrics/ci_masked_recon_subset.py
@@ -30,7 +30,7 @@ class CIMaskedReconSubsetLossConfig(LossMetricConfig):
 def _ci_masked_recon_subset_loss_update(
     model: ComponentModel,
     batch: Any,
-    target_out: Tensor,
+    target_out: Any,
     ci: dict[str, Float[Tensor, "... C"]],
     router: Router,
     reconstruction_loss: ReconstructionLoss,
@@ -51,7 +51,7 @@ def _ci_masked_recon_subset_loss_update(
 def ci_masked_recon_subset_loss(
     model: ComponentModel,
     batch: Any,
-    target_out: Tensor,
+    target_out: Any,
     ci: dict[str, Float[Tensor, "... C"]],
     routing: SubsetRoutingType,
     reconstruction_loss: ReconstructionLoss,
diff --git a/param_decomp/metrics/context.py b/param_decomp/metrics/context.py
@@ -24,7 +24,9 @@ class MetricContext:
 
     model: ComponentModel
     batch: Any
-    target_out: Tensor
+    target_out: (
+        Any  # Whatever `RunBatch` returns — Tensor in simple cases, dataclass / dict otherwise.
+    )
     pre_weight_acts: dict[str, Float[Tensor, "..."]]
     ci: CIOutputs
     weight_deltas: dict[str, Float[Tensor, "d_out d_in"]]
diff --git a/param_decomp/metrics/persistent_pgd_state.py b/param_decomp/metrics/persistent_pgd_state.py
@@ -9,7 +9,7 @@
 from typing import Annotated, Any, Literal, override
 
 import torch
-from jaxtyping import Float, Int
+from jaxtyping import Float
 from pydantic import Field, NonNegativeFloat, PositiveInt
 from torch import Tensor
 from torch.distributed import ReduceOp
@@ -311,8 +311,8 @@ def load_state_dict(self, state: dict[str, Any]) -> None:
     def warmup(
         self,
         model: ComponentModel,
-        batch: Int[Tensor, "..."] | Float[Tensor, "..."],
-        target_out: Float[Tensor, "... vocab"],
+        batch: Any,
+        target_out: Any,
         ci: dict[str, Float[Tensor, "... C"]],
         weight_deltas: dict[str, Float[Tensor, "d_out d_in"]] | None,
     ) -> None:
@@ -331,8 +331,8 @@ def warmup(
     def compute_recon_sum_and_n(
         self,
         model: ComponentModel,
-        batch: Int[Tensor, "..."] | Float[Tensor, "..."],
-        target_out: Float[Tensor, "... vocab"],
+        batch: Any,
+        target_out: Any,
         ci: dict[str, Float[Tensor, "... C"]],
         weight_deltas: dict[str, Float[Tensor, "d_out d_in"]] | None,
         router: Router | None = None,
@@ -420,8 +420,8 @@ def _compute_ppgd_recon_loss(
     model: ComponentModel,
     ppgd_sources: PPGDSources,
     reconstruction_loss: ReconstructionLoss,
-    batch: Int[Tensor, "..."] | Float[Tensor, "..."],
-    target_out: Float[Tensor, "... vocab"],
+    batch: Any,
+    target_out: Any,
     ci: dict[str, Float[Tensor, "... C"]],
     weight_deltas: dict[str, Float[Tensor, "d_out d_in"]] | None,
     routing_masks: RoutingMasks,
@@ -431,5 +431,5 @@ def _compute_ppgd_recon_loss(
 
     mask_infos = get_ppgd_mask_infos(ci, weight_deltas, ppgd_sources, routing_masks, batch_dims)
     out = model(batch, mask_infos=mask_infos)
-    loss, n_examples = reconstruction_loss(pred=out, target=target_out)
+    loss, n_examples = reconstruction_loss(output=out, target_output=target_out)
     return loss, n_examples
diff --git a/param_decomp/metrics/pgd_masked_recon.py b/param_decomp/metrics/pgd_masked_recon.py
@@ -22,7 +22,7 @@ def pgd_recon_loss(
     *,
     model: ComponentModel,
     batch: Any,
-    target_out: Tensor,
+    target_out: Any,
     ci: dict[str, Float[Tensor, "... C"]],
     weight_deltas: dict[str, Float[Tensor, "d_out d_in"]] | None,
     pgd_config: PGDConfig,
diff --git a/param_decomp/metrics/pgd_utils.py b/param_decomp/metrics/pgd_utils.py
@@ -129,7 +129,7 @@ def _forward_with_adv_sources(
     ci: dict[str, Float[Tensor, "... C"]],
     weight_deltas: dict[str, Float[Tensor, "d_out d_in"]] | None,
     routing_masks: RoutingMasks,
-    target_out: Tensor,
+    target_out: Any,
     batch_dims: tuple[int, ...],
     reconstruction_loss: ReconstructionLoss,
 ) -> tuple[Float[Tensor, ""], int]:
@@ -149,7 +149,7 @@ def pgd_masked_recon_loss_update(
     batch: Any,
     ci: dict[str, Float[Tensor, "... C"]],
     weight_deltas: dict[str, Float[Tensor, "d_out d_in"]] | None,
-    target_out: Tensor,
+    target_out: Any,
     router: Router,
     pgd_config: PGDConfig,
     reconstruction_loss: ReconstructionLoss,
diff --git a/param_decomp/metrics/stochastic_recon.py b/param_decomp/metrics/stochastic_recon.py
@@ -23,7 +23,7 @@ def _stochastic_recon_loss_update(
     sampling: SamplingType,
     n_mask_samples: int,
     batch: Any,
-    target_out: Tensor,
+    target_out: Any,
     ci: dict[str, Float[Tensor, "... C"]],
     weight_deltas: dict[str, Float[Tensor, "d_out d_in"]] | None,
     reconstruction_loss: ReconstructionLoss,
@@ -50,7 +50,7 @@ def stochastic_recon_loss(
     sampling: SamplingType,
     n_mask_samples: int,
     batch: Any,
-    target_out: Tensor,
+    target_out: Any,
     ci: dict[str, Float[Tensor, "... C"]],
     weight_deltas: dict[str, Float[Tensor, "d_out d_in"]] | None,
     reconstruction_loss: ReconstructionLoss,
diff --git a/param_decomp/metrics/stochastic_recon_layerwise.py b/param_decomp/metrics/stochastic_recon_layerwise.py
@@ -23,7 +23,7 @@ def _stochastic_recon_layerwise_loss_update(
     sampling: SamplingType,
     n_mask_samples: int,
     batch: Any,
-    target_out: Tensor,
+    target_out: Any,
     ci: dict[str, Float[Tensor, "... C"]],
     weight_deltas: dict[str, Float[Tensor, "d_out d_in"]] | None,
     reconstruction_loss: ReconstructionLoss,
@@ -54,7 +54,7 @@ def stochastic_recon_layerwise_loss(
     sampling: SamplingType,
     n_mask_samples: int,
     batch: Any,
-    target_out: Tensor,
+    target_out: Any,
     ci: dict[str, Float[Tensor, "... C"]],
     weight_deltas: dict[str, Float[Tensor, "d_out d_in"]] | None,
     reconstruction_loss: ReconstructionLoss,
diff --git a/param_decomp/metrics/stochastic_recon_subset.py b/param_decomp/metrics/stochastic_recon_subset.py
@@ -34,7 +34,7 @@ def _stochastic_recon_subset_loss_update(
     sampling: SamplingType,
     n_mask_samples: int,
     batch: Any,
-    target_out: Tensor,
+    target_out: Any,
     ci: dict[str, Float[Tensor, "... C"]],
     weight_deltas: dict[str, Float[Tensor, "d_out d_in"]] | None,
     router: Router,
@@ -65,7 +65,7 @@ def stochastic_recon_subset_loss(
     sampling: SamplingType,
     n_mask_samples: int,
     batch: Any,
-    target_out: Tensor,
+    target_out: Any,
     ci: dict[str, Float[Tensor, "... C"]],
     weight_deltas: dict[str, Float[Tensor, "d_out d_in"]] | None,
     routing: SubsetRoutingType,
diff --git a/param_decomp/metrics/unmasked_recon.py b/param_decomp/metrics/unmasked_recon.py
@@ -21,7 +21,7 @@ class UnmaskedReconLossConfig(LossMetricConfig):
 def _unmasked_recon_loss_update(
     model: ComponentModel,
     batch: Any,
-    target_out: Tensor,
+    target_out: Any,
     reconstruction_loss: ReconstructionLoss,
 ) -> tuple[Float[Tensor, ""], int]:
     device = get_obj_device(model)
diff --git a/param_decomp/tests/test_optimize.py b/param_decomp/tests/test_optimize.py
@@ -44,9 +44,9 @@ def run_batch_passthrough(model: nn.Module, batch: Any) -> Tensor:
     return out
 
 
-def recon_loss_mse(pred: Tensor, target: Tensor) -> tuple[Tensor, int]:
-    assert pred.shape == target.shape
-    return ((pred - target) ** 2).sum(), pred.numel()
+def recon_loss_mse(output: Tensor, target_output: Tensor) -> tuple[Tensor, int]:
+    assert output.shape == target_output.shape
+    return ((output - target_output) ** 2).sum(), output.numel()
 
 
 class CaptureSink:
diff --git a/param_decomp_lab/batch_and_loss_fns.py b/param_decomp_lab/batch_and_loss_fns.py
@@ -40,31 +40,40 @@ def make_run_batch(output_extract: int | str | None) -> RunBatch:
 
 
 def recon_loss_mse(
-    pred: Float[Tensor, "... d"],
-    target: Float[Tensor, "... d"],
+    output: Float[Tensor, "... d"],
+    target_output: Float[Tensor, "... d"],
 ) -> tuple[Float[Tensor, ""], int]:
-    """Elementwise MSE recon loss returning `(sum_squared_errors, n_elements)`."""
-    assert pred.shape == target.shape
-    squared_errors = (pred - target) ** 2
-    return squared_errors.sum(), pred.numel()
+    """Elementwise MSE recon loss returning `(sum_squared_errors, n_elements)`.
+
+    Treats both args as bare tensors. Experiments that need padding-masked / weighted
+    MSE should write a custom `ReconstructionLoss` whose `output` is a dataclass
+    carrying the mask.
+    """
+    assert output.shape == target_output.shape
+    squared_errors = (output - target_output) ** 2
+    return squared_errors.sum(), output.numel()
 
 
 def calc_kl_divergence_lm(
     pred: Float[Tensor, "... vocab"],
     target: Float[Tensor, "... vocab"],
 ) -> Float[Tensor, ""]:
     """Mean per-position KL between logits tensors. `pred = Q`, `target = P`."""
-    sum_kl, n_positions = recon_loss_kl(pred=pred, target=target)
+    sum_kl, n_positions = recon_loss_kl(output=pred, target_output=target)
     return sum_kl / n_positions
 
 
 def recon_loss_kl(
-    pred: Float[Tensor, "... vocab"],
-    target: Float[Tensor, "... vocab"],
+    output: Float[Tensor, "... vocab"],
+    target_output: Float[Tensor, "... vocab"],
 ) -> tuple[Float[Tensor, ""], int]:
-    """KL recon loss returning `(sum_per_position_kl, n_positions)`. `pred = Q`, `target = P`."""
-    assert pred.shape == target.shape
-    log_q = torch.log_softmax(pred, dim=-1)  # log Q
-    p = torch.softmax(target, dim=-1)  # P
-    n_positions = pred.numel() // pred.shape[-1]
+    """KL recon loss returning `(sum_per_position_kl, n_positions)`.
+
+    `output` is treated as Q (decomposed-model logits), `target_output` as P-source
+    (target-model logits, softmaxed inside).
+    """
+    assert output.shape == target_output.shape
+    log_q = torch.log_softmax(output, dim=-1)  # log Q
+    p = torch.softmax(target_output, dim=-1)  # P
+    n_positions = output.numel() // output.shape[-1]
     return F.kl_div(log_q, p, reduction="sum"), n_positions
diff --git a/param_decomp_lab/tests/test_batch_and_loss_fns.py b/param_decomp_lab/tests/test_batch_and_loss_fns.py
diff --git a/param_decomp_lab/tests/test_resumption.py b/param_decomp_lab/tests/test_resumption.py