Skip to content

Commit 83998cc

Browse files
Refactor TornadoVM initialization metrics tracking to include finer granularity across plan creation, JIT, and weight transfer stages
1 parent 45ada2f commit 83998cc

4 files changed

Lines changed: 28 additions & 38 deletions

File tree

src/main/java/org/beehive/gpullama3/auxiliary/RunMetrics.java

Lines changed: 25 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,9 @@ public final class RunMetrics {
2929
private boolean hasPrefillPhase;
3030

3131
// ── TornadoVM-specific metrics (nanoseconds) ──────────────────────────────
32-
private long tornadoCompileDurationNs;
33-
private long tornadoWarmupDurationNs;
32+
private long tornadoPlanCreationNs;
33+
private long tornadoJitNs;
34+
private long readOnlyWeightsCopyInNs;
3435

3536
// ── Singleton ─────────────────────────────────────────────────────────────
3637
private static final RunMetrics INSTANCE = new RunMetrics();
@@ -47,12 +48,14 @@ public static void setLoadDuration(long ns) {
4748
/**
4849
* Records TornadoVM-specific initialisation durations.
4950
*
50-
* @param compileNs plan-graph construction + JIT compilation ({@code withPreCompilation()})
51-
* @param warmupNs first-execution weight upload ({@code forceCopyInReadOnlyData()})
51+
* @param planCreationNs task-graph construction ({@code createExecutionPlan()})
52+
* @param jitNs JIT compilation ({@code withPreCompilation()})
53+
* @param weightCopyNs first-execution weight upload ({@code forceCopyInReadOnlyData()})
5254
*/
53-
public static void setTornadoMetrics(long compileNs, long warmupNs) {
54-
INSTANCE.tornadoCompileDurationNs = compileNs;
55-
INSTANCE.tornadoWarmupDurationNs = warmupNs;
55+
public static void setTornadoMetrics(long planCreationNs, long jitNs, long weightCopyNs) {
56+
INSTANCE.tornadoPlanCreationNs = planCreationNs;
57+
INSTANCE.tornadoJitNs = jitNs;
58+
INSTANCE.readOnlyWeightsCopyInNs = weightCopyNs;
5659
}
5760

5861
/**
@@ -85,13 +88,13 @@ public static void setHasPrefillPhase(boolean value) {
8588

8689
// ── Output ────────────────────────────────────────────────────────────────
8790

88-
/** Prints throughput metrics to {@code stderr}. */
91+
/** Prints throughput metrics to {@code stderr}, and TornadoVM init metrics when enabled. */
8992
public static void printMetrics() {
9093
RunMetrics m = INSTANCE;
9194

92-
int totalTokens = m.promptEvalCount + m.evalCount;
93-
double totalSecs = m.totalDurationNs / 1e9;
94-
double totalRate = totalSecs > 0 ? totalTokens / totalSecs : 0;
95+
int totalTokens = m.promptEvalCount + m.evalCount;
96+
double totalSecs = m.totalDurationNs / 1e9;
97+
double totalRate = totalSecs > 0 ? totalTokens / totalSecs : 0;
9598

9699
System.err.println();
97100
System.err.println("==== Performance Metrics ====");
@@ -113,6 +116,17 @@ public static void printMetrics() {
113116
System.err.printf("achieved tok/s: %.2f. Tokens: %d, seconds: %.2f%n",
114117
totalRate, totalTokens, totalSecs);
115118
}
119+
120+
if (Boolean.parseBoolean(System.getProperty("llama.EnableTimingForTornadoVMInit", "false"))
121+
&& m.tornadoPlanCreationNs > 0) {
122+
System.err.printf(
123+
"Compilation & CodeGen: %.2f ms%n" +
124+
"Warmup: %.2f ms%n" +
125+
"Read-only weights Copy-in: %.2f ms%n",
126+
m.tornadoPlanCreationNs / 1_000_000.0,
127+
m.tornadoJitNs / 1_000_000.0,
128+
m.readOnlyWeightsCopyInNs / 1_000_000.0);
129+
}
116130
System.err.println();
117131
}
118132
}

src/main/java/org/beehive/gpullama3/tornadovm/TornadoVMMasterPlanStandard.java

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -47,14 +47,7 @@ public TornadoVMMasterPlanStandard(State state, Model model) {
4747
forceCopyInReadOnlyData();
4848
long copyTime = System.nanoTime();
4949

50-
RunMetrics.setTornadoMetrics(warmupTime - startTime, copyTime - warmupTime);
51-
52-
if (ENABLE_TORNADOVM_INIT_TIME) {
53-
System.err.printf("TornadoVM GPU standard execution plan creation: %.2f ms\n", (planCreationTime - startTime) / 1_000_000.0);
54-
System.err.printf("Java to GPU JIT compiler warmup: %.2f ms\n", (warmupTime - planCreationTime) / 1_000_000.0);
55-
System.err.printf("Transfer read-only weights to GPU: %.2f ms\n", (copyTime - warmupTime) / 1_000_000.0);
56-
System.err.printf("Finished TornadoVM initialization...\n \n");
57-
}
50+
RunMetrics.setTornadoMetrics(planCreationTime - startTime, warmupTime - planCreationTime, copyTime - warmupTime);
5851
}
5952

6053
/**

src/main/java/org/beehive/gpullama3/tornadovm/TornadoVMMasterPlanWithBatchPrefillDecode.java

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -97,14 +97,7 @@ public class TornadoVMMasterPlanWithBatchPrefillDecode implements TornadoVMMaste
9797
forceCopyInReadOnlyData();
9898
long copyTime = System.nanoTime();
9999

100-
RunMetrics.setTornadoMetrics(warmupTime - startTime, copyTime - warmupTime);
101-
102-
if (ENABLE_TORNADOVM_INIT_TIME) {
103-
System.err.printf("TornadoVM GPU batched prefill/decode execution plan creation: %.2f ms\n", (planCreationTime - startTime) / 1_000_000.0);
104-
System.err.printf("Java to GPU JIT compiler warmup: %.2f ms\n", (warmupTime - planCreationTime) / 1_000_000.0);
105-
System.err.printf("Transfer read-only weights to GPU: %.2f ms\n", (copyTime - warmupTime) / 1_000_000.0);
106-
System.err.printf("Finished TornadoVM initialization...\n \n");
107-
}
100+
RunMetrics.setTornadoMetrics(planCreationTime - startTime, warmupTime - planCreationTime, copyTime - warmupTime);
108101
}
109102

110103
// ── Batch Prefill Activation graphs ─────────────────────────────────────────────────────

src/main/java/org/beehive/gpullama3/tornadovm/TornadoVMMasterPlanWithPrefillDecode.java

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -85,17 +85,7 @@ public class TornadoVMMasterPlanWithPrefillDecode implements TornadoVMMasterPlan
8585
forceCopyInReadOnlyData();
8686
long copyTime = System.nanoTime();
8787

88-
RunMetrics.setTornadoMetrics(warmupTime - startTime, copyTime - warmupTime);
89-
90-
if (ENABLE_TORNADOVM_INIT_TIME) {
91-
System.err.printf("TornadoVM GPU single-token prefill/decode execution plan creation: %.2f ms\n",
92-
(planCreationTime - startTime) / 1_000_000.0);
93-
System.err.printf("Java to GPU JIT compiler warmup: %.2f ms\n",
94-
(warmupTime - planCreationTime) / 1_000_000.0);
95-
System.err.printf("Transfer read-only weights to GPU: %.2f ms\n",
96-
(copyTime - warmupTime) / 1_000_000.0);
97-
System.err.printf("Finished TornadoVM initialization...\n \n");
98-
}
88+
RunMetrics.setTornadoMetrics(planCreationTime - startTime, warmupTime - planCreationTime, copyTime - warmupTime);
9989
}
10090

10191
// ── Activation graph ─────────────────────────────────────────────────────

0 commit comments

Comments
 (0)