@@ -3,12 +3,17 @@ package sk.ainet.exec.tensor.ops
33import kotlin.math.abs
44import kotlin.random.Random
55import kotlin.test.Test
6+ import kotlin.test.assertEquals
67import kotlin.test.assertTrue
78import sk.ainet.context.DirectCpuExecutionContext
89import sk.ainet.lang.tensor.Shape
10+ import sk.ainet.lang.tensor.data.Q4_0BlockTensorData
911import sk.ainet.lang.tensor.data.Q4_KBlockTensorData
12+ import sk.ainet.lang.tensor.data.Q5_0BlockTensorData
1013import sk.ainet.lang.tensor.data.Q5_1BlockTensorData
14+ import sk.ainet.lang.tensor.data.Q5_KBlockTensorData
1115import sk.ainet.lang.tensor.data.Q6_KBlockTensorData
16+ import sk.ainet.lang.tensor.data.Q8_0BlockTensorData
1217import sk.ainet.lang.tensor.data.TensorData
1318import sk.ainet.lang.types.FP32
1419
@@ -129,4 +134,40 @@ class PackedMatmulDispatchTest {
129134 @Test fun q5_1_through_ops_matmul_transpose () = run (" Q5_1" , inDim = 128 , outDim = 16 , seed = 7 )
130135 @Test fun q4_k_through_ops_matmul_transpose () = run (" Q4_K" , inDim = 256 , outDim = 12 , seed = 8 )
131136 @Test fun q6_k_through_ops_matmul_transpose () = run (" Q6_K" , inDim = 512 , outDim = 8 , seed = 9 )
137+
138+ /* *
139+ * `ops.transpose` must lazily rewrap EVERY packed quant type that can be a
140+ * matmul weight (the full `chooseQuantizedMatmulHeap` set) — flipping the
141+ * shape while keeping the same packed bytes — instead of falling into the
142+ * generic FP32 path, which casts the Byte-backed buffer to Float and throws
143+ * `ClassCastException`. Regression guard for transformers #178 (Q8_0/Q4_0
144+ * were the gaps). Content-agnostic: zero bytes, sized per block geometry.
145+ */
146+ @Test
147+ fun transpose_preserves_every_packed_quant_type () {
148+ val outDim = 8
149+ // name -> (blockElems, bytesPerBlock, builder)
150+ val cases: List <Triple <String , Pair <Int , Int >, (Shape , ByteArray ) -> TensorData <FP32 , Float >>> = listOf (
151+ Triple (" Q4_K" , 256 to 144 ) { s, b -> Q4_KBlockTensorData (s, b) as TensorData <FP32 , Float > },
152+ Triple (" Q5_K" , 256 to 176 ) { s, b -> Q5_KBlockTensorData (s, b) as TensorData <FP32 , Float > },
153+ Triple (" Q6_K" , 256 to 210 ) { s, b -> Q6_KBlockTensorData (s, b) as TensorData <FP32 , Float > },
154+ Triple (" Q8_0" , 32 to 34 ) { s, b -> Q8_0BlockTensorData (s, b) as TensorData <FP32 , Float > },
155+ Triple (" Q4_0" , 32 to 18 ) { s, b -> Q4_0BlockTensorData (s, b) as TensorData <FP32 , Float > },
156+ Triple (" Q5_0" , 32 to 22 ) { s, b -> Q5_0BlockTensorData (s, b) as TensorData <FP32 , Float > },
157+ Triple (" Q5_1" , 32 to 24 ) { s, b -> Q5_1BlockTensorData (s, b) as TensorData <FP32 , Float > },
158+ )
159+ for ((name, geom, build) in cases) {
160+ val (blockElems, bpb) = geom
161+ val inDim = blockElems // one block per row
162+ val bytes = ByteArray (outDim * (inDim / blockElems) * bpb)
163+ val w = ctx.fromData(build(Shape (outDim, inDim), bytes), FP32 ::class )
164+ // The bug threw here for unhandled packed types.
165+ val t = ctx.ops.transpose(w)
166+ assertEquals(Shape (inDim, outDim), t.shape, " $name : transpose did not flip shape" )
167+ assertTrue(
168+ t.data::class .simpleName?.contains(" Block" ) == true ,
169+ " $name : transpose dropped the packed encoding (got ${t.data::class .simpleName} )" ,
170+ )
171+ }
172+ }
132173}
0 commit comments