Skip to content

Commit d74a228

Browse files
[prf/dec] Fix predecessor graph naming and device data consumption for non-CUDA Graphs
1 parent 11ecb11 commit d74a228

1 file changed

Lines changed: 6 additions & 3 deletions

File tree

src/main/java/org/beehive/gpullama3/tornadovm/layers/type/fp16/LlamaFP16FFNLayers.java

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -280,7 +280,7 @@ protected TaskGraph createFFNLayerTaskGraph(int layerIndex) {
280280
* </ul>
281281
*/
282282
protected String predecessorGraphName(int layerIndex) {
283-
return null;
283+
return (layerIndex == 0) ? "activationUpdate" : "layer_" + (layerIndex - 1);
284284
}
285285

286286
protected TaskGraph configureLayerDataTransfers(TaskGraph unifiedLayer, int layerIndex) {
@@ -302,8 +302,11 @@ protected TaskGraph configureLayerDataTransfers(TaskGraph unifiedLayer, int laye
302302
// Attention & FFN buffers
303303
state.wrapAtt, state.wrapHb, state.wrapXbFP16);
304304
} else {
305-
// Subsequent layers: Consume data already on device from previous layer
306-
unifiedLayer.consumeFromDevice(
305+
// Subsequent layers: consume from the previous layer graph by name.
306+
// The no-arg consumeFromDevice form uses the current graph's own name as source key,
307+
// which never matches the predecessor in interpreter mode (no CUDA graphs).
308+
String pred = "layer_" + (layerIndex - 1);
309+
unifiedLayer.consumeFromDevice(pred,
307310
// Kernel context
308311
context,
309312
// Intermediate buffers

0 commit comments

Comments
 (0)