SciSharp
diff --git a/‎src/NumSharp.Core/Backends/Default/Math/BLAS/Default.MatMul.2D2D.cs‎
Lines changed: 2 additions & 4 deletions b/‎src/NumSharp.Core/Backends/Default/Math/BLAS/Default.MatMul.2D2D.cs‎
Lines changed: 2 additions & 4 deletions
@@ -81,14 +81,12 @@ private static unsafe bool TryMatMulSimd(NDArray left, NDArray right, NDArray re
             {
                 case NPTypeCode.Single:
                 {
-                    var kernel = ILKernelGenerator.GetMatMulKernel<float>();
-                    if (kernel == null) return false;
-
                     float* a = (float*)left.Address;
                     float* b = (float*)right.Address;
                     float* c = (float*)result.Address;
 
-                    kernel(a, b, c, M, N, K);
+                    // Use cache-blocked implementation for better performance
+                    SimdMatMul.MatMulFloat(a, b, c, M, N, K);
                     return true;
                 }
Original file line number	Diff line number	Diff line change
`@@ -81,14 +81,12 @@ private static unsafe bool TryMatMulSimd(NDArray left, NDArray right, NDArray re`
`81`	`81`	`{`
`82`	`82`	`case NPTypeCode.Single:`
`83`	`83`	`{`
`84`		`- var kernel = ILKernelGenerator.GetMatMulKernel<float>();`
`85`		`- if (kernel == null) return false;`
`86`		`-`
`87`	`84`	`float* a = (float*)left.Address;`
`88`	`85`	`float* b = (float*)right.Address;`
`89`	`86`	`float* c = (float*)result.Address;`
`90`	`87`
`91`		`- kernel(a, b, c, M, N, K);`
	`88`	`+ // Use cache-blocked implementation for better performance`
	`89`	`+ SimdMatMul.MatMulFloat(a, b, c, M, N, K);`
`92`	`90`	`return true;`
`93`	`91`	`}`
`94`	`92`