[prf/dec][doc] Update javadoc to reflect unified batched prefill-decode plan

orionpapadakis · orionpapadakis · commit aa53ebe8fbe9 · 2026-04-16T18:44:59.000+03:00
diff --git a/src/main/java/org/beehive/gpullama3/tornadovm/layers/type/fp16/decode/LlamaFP16FFNLayersDecode.java b/src/main/java/org/beehive/gpullama3/tornadovm/layers/type/fp16/decode/LlamaFP16FFNLayersDecode.java
@@ -9,7 +9,8 @@
 import uk.ac.manchester.tornado.api.enums.DataTransferMode;
 
 /**
- * Decode-path FFN layers for the Phase 4 unified plan.
+ * Decode FFN layers of the unified batched prefill-decode plan
+ * ({@link org.beehive.gpullama3.tornadovm.TornadoVMMasterPlanWithBatchPrefillDecode}).
  *
  * <p>Overrides data-transfer declarations so that all cross-graph boundaries use
  * the explicit-source form of {@code consumeFromDevice}.  The no-arg form (used by
@@ -20,11 +21,6 @@
  * never propagated — causing either a null-pointer crash or a silent re-upload
  * from host (zeros), corrupting the hidden state and KV cache.</p>
  *
- * <p>Two boundaries are fixed here:</p>
- * <ul>
- *   <li>{@code wrapX}: via {@link #predecessorGraphName} hook in the base class.</li>
- *   <li>All other consumed objects: via the {@link #configureLayerDataTransfers} override.</li>
- * </ul>
  */
 public class LlamaFP16FFNLayersDecode extends LlamaFP16FFNLayers {
     public LlamaFP16FFNLayersDecode(String taskGraph, LlamaState state,
diff --git a/src/main/java/org/beehive/gpullama3/tornadovm/layers/type/fp16/decode/LogitsFP16LayerDecode.java b/src/main/java/org/beehive/gpullama3/tornadovm/layers/type/fp16/decode/LogitsFP16LayerDecode.java
@@ -8,7 +8,8 @@
 import uk.ac.manchester.tornado.api.TaskGraph;
 
 /**
- * Logits layer for the unified prefill-decode plan (Phase 4).
+ * Logits layer of the unified batched prefill-decode plan
+ *  * ({@link org.beehive.gpullama3.tornadovm.TornadoVMMasterPlanWithBatchPrefillDecode}).
  *
  * <p>Extends {@link LogitsFP16Layer} with KV-cache pass-through so the device
  * pointers for {@code wrapKeyCache} and {@code wrapValueCache} survive the
diff --git a/src/main/java/org/beehive/gpullama3/tornadovm/layers/type/fp16/prefill/LlamaFP16LayersBatchPrefill.java b/src/main/java/org/beehive/gpullama3/tornadovm/layers/type/fp16/prefill/LlamaFP16LayersBatchPrefill.java
@@ -16,7 +16,8 @@
 import java.util.stream.IntStream;
 
 /**
- * Builds per-layer batch prefill TaskGraphs for Phase 4 GPU batched prefill.
+ * Prefill FFN layers with batching for the unified batched prefill-decode plan
+ * ({@link org.beehive.gpullama3.tornadovm.TornadoVMMasterPlanWithBatchPrefillDecode}).
  *
  * <p>One {@link ImmutableTaskGraph} per transformer layer, each processing
  * {@code batchSize} tokens simultaneously via {@link TransformerBatchPrefillKernels}.</p>

Original file line number	Diff line number	Diff line change
`@@ -8,7 +8,8 @@`
`8`	`8`	`import uk.ac.manchester.tornado.api.TaskGraph;`
`9`	`9`
`10`	`10`	`/**`
`11`		`- * Logits layer for the unified prefill-decode plan (Phase 4).`
	`11`	`+ * Logits layer of the unified batched prefill-decode plan`
	`12`	`+ * * ({@link org.beehive.gpullama3.tornadovm.TornadoVMMasterPlanWithBatchPrefillDecode}).`
`12`	`13`	`*`
`13`	`14`	`* <p>Extends {@link LogitsFP16Layer} with KV-cache pass-through so the device`
`14`	`15`	`* pointers for {@code wrapKeyCache} and {@code wrapValueCache} survive the`
Original file line number	Diff line number	Diff line change
`@@ -16,7 +16,8 @@`
`16`	`16`	`import java.util.stream.IntStream;`
`17`	`17`
`18`	`18`	`/**`
`19`		`- * Builds per-layer batch prefill TaskGraphs for Phase 4 GPU batched prefill.`
	`19`	`+ * Prefill FFN layers with batching for the unified batched prefill-decode plan`
	`20`	`+ * ({@link org.beehive.gpullama3.tornadovm.TornadoVMMasterPlanWithBatchPrefillDecode}).`
`20`	`21`	`*`
`21`	`22`	`* <p>One {@link ImmutableTaskGraph} per transformer layer, each processing`
`22`	`23`	`* {@code batchSize} tokens simultaneously via {@link TransformerBatchPrefillKernels}.</p>`