Skip to content

Commit aaffafb

Browse files
michalharakalclaude
andcommitted
test(gemma): end-to-end parity for load(NATIVE_OPTIMIZED) packed path
Extends GemmaQ5KPackedParityTest to also decode via GemmaNetworkLoader.load(NATIVE_OPTIMIZED) — the wired commonMain convertGemmaWeightsPacked (board) path, no MemSeg/Arena. All three paths (FP32 baseline, jvmMain MemSeg-packed, load() packed) produce the identical token sequence -> `<tool_0>(state="on")<end>` for "Turn the light on." Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent cb96e53 commit aaffafb

1 file changed

Lines changed: 15 additions & 0 deletions

File tree

llm-inference/gemma/src/jvmTest/kotlin/sk/ainet/models/gemma/GemmaQ5KPackedParityTest.kt

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,5 +123,20 @@ class GemmaQ5KPackedParityTest {
123123

124124
assertEquals(genFp32, genNat, "Q5_K packed decode diverged from FP32 baseline")
125125
}
126+
127+
// The wired path: GemmaNetworkLoader.load(NATIVE_OPTIMIZED) applies the
128+
// commonMain convertGemmaWeightsPacked (the board path) — no MemSeg, no
129+
// Arena. Must decode identically to the FP32 baseline too.
130+
val mLoad = GemmaNetworkLoader.fromGguf(
131+
randomAccessProvider = { JvmRandomAccessSource.open(gguf) },
132+
quantPolicy = QuantPolicy.NATIVE_OPTIMIZED,
133+
).load<FP32, Float>(ctx)
134+
val rtLoad = OptimizedLLMRuntime(
135+
model = mLoad, ctx = ctx, mode = OptimizedLLMMode.DIRECT,
136+
dtype = FP32::class, bos = tokenizer.bosTokenId,
137+
)
138+
val genLoad = decode(rtLoad, promptTokens, maxNew, eos, eot)
139+
println("load(NATIVE_OPTIMIZED) gen=$genLoad")
140+
assertEquals(genFp32, genLoad, "load(NATIVE_OPTIMIZED) packed decode diverged from FP32 baseline")
126141
}
127142
}

0 commit comments

Comments
 (0)