Skip to content

Commit 1cbe491

Browse files
[prf/dec][cleanup] Remove unused debug logs and commented-out code from TornadoVM execution paths.
1 parent 869c67d commit 1cbe491

3 files changed

Lines changed: 0 additions & 38 deletions

File tree

src/main/java/org/beehive/gpullama3/tornadovm/TornadoVMMasterPlanStandard.java

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@ public TornadoVMMasterPlanStandard(State state, Model model) {
3838
this.state = state;
3939
this.model = model;
4040
this.config = model.configuration();
41-
4241
this.executionPlan = createExecutionPlan();
4342

4443
if (ENABLE_TORNADOVM_INIT_TIME) {
@@ -63,12 +62,6 @@ public TornadoVMMasterPlanStandard(State state, Model model) {
6362
}
6463
}
6564

66-
// @Override
67-
// public GenericLayerPlanner createPlanner() {
68-
// GGMLType weightType = model.weights().getWeightType();
69-
// return QuantizationPlannerFactory.create(weightType, state, model);
70-
// }
71-
7265
/**
7366
* Creates the {@link TornadoExecutionPlan} for *simple/standard* single-token forward pass.
7467
*/

src/main/java/org/beehive/gpullama3/tornadovm/TornadoVMMasterPlanWithBatchPrefillDecode.java

Lines changed: 0 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,6 @@ public class TornadoVMMasterPlanWithBatchPrefillDecode implements TornadoVMMaste
8181
this.config = (LlamaConfiguration) model.configuration();
8282
this.batchSize = PREFILL_BATCH_SIZE;
8383
this.N = config.numberOfLayers();
84-
8584
this.gridScheduler = new GridScheduler();
8685
this.executionPlan = createExecutionPlan();
8786

@@ -129,23 +128,12 @@ private TaskGraph buildBatchPrefillActivationGraph(KernelContext ctx) {
129128
* not forwarded in interpreter (non-CUDA-graph) mode.</p>
130129
*/
131130
private TaskGraph buildDecodeActivationGraph(KernelContext ctx, String lastBatchLayerID) {
132-
// System.out.println("lastBatchLayerID = " + lastBatchLayerID);
133-
// System.out.println("[buildDecodeActivationGraph] state.wrapX = " + state.wrapX.toString());
134-
// System.out.println("[buildDecodeActivationGraph] state.wrapKeyCache = " + state.wrapKeyCache.toString());
135-
// System.out.println("[buildDecodeActivationGraph] state.wrapValueCache = " + state.wrapValueCache.toString());
136131
return new TaskGraph("decodeActivationUpdate")
137132
.consumeFromDevice(lastBatchLayerID, state.wrapKeyCache, state.wrapValueCache) // KV pass-through
138-
//.transferToDevice(DataTransferMode.FIRST_EXECUTION, ctx, state.wrapX, debugKV)
139-
//.transferToDevice(DataTransferMode.FIRST_EXECUTION, ctx, state.wrapX)
140133
.transferToDevice(DataTransferMode.EVERY_EXECUTION, state.embeddingX)
141134
.task("updateX",
142135
TransformerComputeKernels::convertFP16toFP32,
143136
ctx, (HalfFloatArray) state.embeddingX, state.wrapX)
144-
// // DEBUG: snapshot first 8 elements of wrapKeyCache and wrapX for host-side probe
145-
// .task("dbgKV",
146-
// TransformerComputeKernels::dbgCopyFirst8,
147-
// state.wrapKeyCache, debugKV)
148-
// .transferToHost(DataTransferMode.EVERY_EXECUTION, state.wrapX, debugKV)
149137
// wrapX persisted for decode layer 0; wrapKeyCache/wrapValueCache
150138
// re-persisted so updatePersistedObjectState() propagates the device
151139
// pointer to decode layer 0's consumeFromDevice without CUDA graphs.
@@ -210,7 +198,6 @@ public void forceCopyInReadOnlyData() {
210198
state.batchStartPosHolder.init(0);
211199

212200
for (int i = 0; i <= logitsIdx(); i++) {
213-
//System.out.println(i + " " + executionPlan.withGraph(i).toString());
214201
var g = executionPlan.withGraph(i).withGridScheduler(gridScheduler);
215202
if (CUDA_GRAPHS) g.withCUDAGraph();
216203
g.execute();
@@ -252,7 +239,6 @@ public void tornadoVMForwardBatchPrefill(int[] tokenIds, int startPos, Model mod
252239
if (CUDA_GRAPHS) batchLayer.withCUDAGraph();
253240
batchLayer.execute();
254241
}
255-
//System.err.println("[DEBUG] last batch layer done, about to return from prefill");
256242
// Logits skipped — not needed for prefill positions.
257243
}
258244

@@ -280,16 +266,7 @@ public FloatArray tornadoVMForwardDecode(int token, int position, Model model) {
280266
// Graph N+1: decode activation
281267
var decodeAct = executionPlan.withGraph(decodeActivationIdx()).withGridScheduler(gridScheduler);
282268
if (CUDA_GRAPHS) decodeAct.withCUDAGraph();
283-
//System.err.println("[DEBUG] about to execute decode activation (graph " + decodeActivationIdx() + "--)");
284269
decodeAct.execute();
285-
// DEBUG: print first 4 of wrapX (should be non-zero FP32 embedding) and
286-
// first 4 of debugKV (should be non-zero after batch prefill wrote the KV cache)
287-
// if (position <= 290) {
288-
// System.err.printf("[DBG pos=%d] wrapX[0..3] = %.4f %.4f %.4f %.4f%n",
289-
// position, state.wrapX.get(0), state.wrapX.get(1), state.wrapX.get(2), state.wrapX.get(3));
290-
// System.err.printf("[DBG pos=%d] debugKV[0..3]= %.4f %.4f %.4f %.4f%n",
291-
// position, debugKV.get(0), debugKV.get(1), debugKV.get(2), debugKV.get(3));
292-
// }
293270

294271
// Graphs N+2..2N+1: decode transformer layers
295272
for (int l = 0; l < N; l++) {
@@ -321,11 +298,4 @@ public void freeTornadoExecutionPlan() {
321298
executionPlan.freeDeviceMemory();
322299
}
323300

324-
// ── Inner class: decode layer 0 with consumeFromDevice for KV cache ───────
325-
// moved to package
326-
//
327-
// private static final class LlamaFP16FFNLayersForUnifiedDecode extends LlamaFP16FFNLayers {
328-
//
329-
//
330-
// }
331301
}

src/main/java/org/beehive/gpullama3/tornadovm/TornadoVMMasterPlanWithPrefillDecode.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,6 @@ public TornadoVMMasterPlanWithPrefillDecode(State state, Model model) {
5050
this.state = state;
5151
this.model = model;
5252
this.config = model.configuration();
53-
5453
this.executionPlan = createExecutionPlan();
5554

5655
if (ENABLE_TORNADOVM_INIT_TIME) {

0 commit comments

Comments
 (0)