Merge pull request #588 from SKaiNET-developers/feature/dsl-lazy-zero-init

michalharakal · web-flow · commit 75b82e2eea74 · 2026-05-02T18:35:33.000+02:00
feat(dsl): lazy zero-init for parameter placeholders
diff --git a/skainet-lang/skainet-lang-core/src/commonMain/kotlin/sk/ainet/context/ExecutionContext.kt b/skainet-lang/skainet-lang-core/src/commonMain/kotlin/sk/ainet/context/ExecutionContext.kt
@@ -64,6 +64,20 @@ public interface ExecutionContext {
         return fromData(data, dtype)
     }
 
+    /**
+     * Lazy-initialized zero tensor — see [TensorDataFactory.placeholder].
+     * The underlying primitive array allocates on first read; if the parameter
+     * is replaced before any read (the common case for DSL modules whose weights
+     * are loaded from disk), the allocation is skipped entirely.
+     */
+    public fun <T : DType, V> placeholder(
+        shape: Shape,
+        dtype: KClass<T>
+    ): Tensor<T, V> {
+        val data = tensorDataFactory.placeholder<T, V>(shape, dtype)
+        return fromData(data, dtype)
+    }
+
     public fun <T : DType, V> ones(
         shape: Shape,
         dtype: KClass<T>
diff --git a/skainet-lang/skainet-lang-core/src/commonMain/kotlin/sk/ainet/lang/nn/dsl/NetworkBuilder.kt b/skainet-lang/skainet-lang-core/src/commonMain/kotlin/sk/ainet/lang/nn/dsl/NetworkBuilder.kt
@@ -621,7 +621,7 @@ private fun <T : DType, V> createLinear(
 
         myInitWeights == null && myInitBias != null -> {
 
-            val safeWeights = executionContext.tensorDataFactory.zeros<T, V>(Shape(outFeatures, inFeatures), kClass)
+            val safeWeights = executionContext.tensorDataFactory.placeholder<T, V>(Shape(outFeatures, inFeatures), kClass)
             val initW = executionContext.fromData(safeWeights, kClass)
 
             Linear(
@@ -635,7 +635,7 @@ private fun <T : DType, V> createLinear(
         }
 
         myInitWeights != null && myInitBias == null -> {
-            val safeBias = executionContext.tensorDataFactory.zeros<T, V>(Shape(outFeatures), kClass)
+            val safeBias = executionContext.tensorDataFactory.placeholder<T, V>(Shape(outFeatures), kClass)
             val initB = executionContext.fromData(safeBias, kClass)
 
             Linear(
@@ -649,8 +649,8 @@ private fun <T : DType, V> createLinear(
         }
 
         else -> {
-            val safeWeights = executionContext.tensorDataFactory.zeros<T, V>(Shape(outFeatures, inFeatures), kClass)
-            val safeBias = executionContext.tensorDataFactory.zeros<T, V>(Shape(outFeatures), kClass)
+            val safeWeights = executionContext.tensorDataFactory.placeholder<T, V>(Shape(outFeatures, inFeatures), kClass)
+            val safeBias = executionContext.tensorDataFactory.placeholder<T, V>(Shape(outFeatures), kClass)
             val initW = executionContext.fromData(safeWeights, kClass)
             val initB = executionContext.fromData(safeBias, kClass)
 
@@ -792,10 +792,10 @@ public class Conv2dImpl<T : DType, V>(
         require(inChannels > 0) { "Conv2d inChannels must be > 0 (set explicitly if not inferred)." }
 
         // Create default tensors if not provided
-        val weights = weightsValue ?: executionContext.zeros(weightsShape, kClass)
+        val weights = weightsValue ?: executionContext.placeholder(weightsShape, kClass)
 
         val biasParam = if (bias) {
-            biasValue ?: executionContext.zeros(biasShape, kClass)
+            biasValue ?: executionContext.placeholder(biasShape, kClass)
         } else null
 
         return Conv2d(
@@ -921,8 +921,8 @@ public class Conv1dImpl<T : DType, V>(
         require(kernelSize > 0) { "Conv1d kernelSize must be > 0." }
         require(inChannels > 0) { "Conv1d inChannels must be > 0." }
 
-        val weights = weightsValue ?: executionContext.zeros(weightsShape, kClass)
-        val biasParam = if (bias) biasValue ?: executionContext.zeros(biasShape, kClass) else null
+        val weights = weightsValue ?: executionContext.placeholder(weightsShape, kClass)
+        val biasParam = if (bias) biasValue ?: executionContext.placeholder(biasShape, kClass) else null
 
         return Conv1d(
             inChannels = inChannels,
@@ -993,8 +993,8 @@ public class Conv3dImpl<T : DType, V>(
         require(kernelSize.first > 0 && kernelSize.second > 0 && kernelSize.third > 0) { "Conv3d kernelSize must be > 0." }
         require(inChannels > 0) { "Conv3d inChannels must be > 0." }
 
-        val weights = weightsValue ?: executionContext.zeros(weightsShape, kClass)
-        val biasParam = if (bias) biasValue ?: executionContext.zeros(biasShape, kClass) else null
+        val weights = weightsValue ?: executionContext.placeholder(weightsShape, kClass)
+        val biasParam = if (bias) biasValue ?: executionContext.placeholder(biasShape, kClass) else null
 
         return Conv3d(
             inChannels = inChannels,
diff --git a/skainet-lang/skainet-lang-core/src/commonMain/kotlin/sk/ainet/lang/tensor/data/DenseTensorDataFactory.kt b/skainet-lang/skainet-lang-core/src/commonMain/kotlin/sk/ainet/lang/tensor/data/DenseTensorDataFactory.kt
@@ -358,6 +358,22 @@ public class DenseTensorDataFactory: TensorDataFactory {
         }
     }
 
+    /**
+     * Returns a [LazyZeroFloatArrayTensorData] / [LazyZeroIntArrayTensorData] for FP32 /
+     * FP16 / Int32. The underlying primitive array materializes only on the first
+     * `get`/`set`/`buffer` access. For Int8 (byte-backed) we currently fall back to
+     * [zeros]; the eager byte allocation is rarely the dominant cost on real models.
+     */
+    override fun <T : DType, V> placeholder(shape: Shape, dtype: KClass<T>): TensorData<T, V> {
+        @Suppress("UNCHECKED_CAST")
+        return when (dtype) {
+            FP32::class -> LazyZeroFloatArrayTensorData<T>(shape) as TensorData<T, V>
+            FP16::class -> LazyZeroFloatArrayTensorData<T>(shape) as TensorData<T, V>
+            Int32::class -> LazyZeroIntArrayTensorData<T>(shape) as TensorData<T, V>
+            else -> zeros(shape, dtype)
+        }
+    }
+
     override fun <T : DType, V> ones(shape: Shape, dtype: KClass<T>): TensorData<T, V> {
         @Suppress("UNCHECKED_CAST")
         return when (dtype) {
diff --git a/skainet-lang/skainet-lang-core/src/commonMain/kotlin/sk/ainet/lang/tensor/data/LazyZeroTensorData.kt b/skainet-lang/skainet-lang-core/src/commonMain/kotlin/sk/ainet/lang/tensor/data/LazyZeroTensorData.kt
@@ -0,0 +1,90 @@
+package sk.ainet.lang.tensor.data
+
+import sk.ainet.lang.tensor.Shape
+import sk.ainet.lang.tensor.storage.ActiveMemoryTracker
+import sk.ainet.lang.types.DType
+
+/**
+ * Zero-allocation [FloatArrayTensorData] whose underlying [FloatArray] materializes
+ * lazily on first read.
+ *
+ * Use when a parameter tensor is going to be replaced before any forward / backward
+ * pass — e.g. immediately after the DSL builds a `Linear`/`Embedding`/`Conv` module
+ * the loader's `WeightMapper.applyWeights` substitutes the entire `Tensor` via
+ * `parameter.value = loadedTensor`. The placeholder is then GC'd before its lazy
+ * fires, eliminating the eager `FloatArray(shape.volume)` cost.
+ *
+ * Behavior is identical to [DenseFloatArrayTensorData] backed by a zero-filled
+ * `FloatArray` for any consumer that doesn't substitute first — the lazy
+ * materializes to zeros on the first `get`/`set`/`buffer` access and is then
+ * cached, so repeated reads return the same values that an eager zero allocation
+ * would have produced.
+ */
+public class LazyZeroFloatArrayTensorData<T : DType>(
+    initialShape: Shape
+) : FloatArrayTensorData<T> {
+    override val shape: Shape = Shape(initialShape.dimensions.copyOf())
+    private val strides: IntArray = this.shape.computeStrides()
+
+    private val backing: FloatArray by lazy {
+        ActiveMemoryTracker.recordCopy(
+            "LazyZeroFloatArrayTensorData.materialize",
+            shape.volume.toLong() * 4
+        )
+        FloatArray(shape.volume)
+    }
+
+    override val buffer: FloatArray
+        get() = backing
+
+    override fun get(vararg indices: Int): Float =
+        backing[calcFlatIndex(shape, strides, indices)]
+
+    override fun set(vararg indices: Int, value: Float) {
+        backing[calcFlatIndex(shape, strides, indices)] = value
+    }
+}
+
+/**
+ * Zero-allocation [IntArrayTensorData] whose backing [IntArray] materializes
+ * lazily on first read. See [LazyZeroFloatArrayTensorData].
+ */
+public class LazyZeroIntArrayTensorData<T : DType>(
+    initialShape: Shape
+) : IntArrayTensorData<T> {
+    override val shape: Shape = Shape(initialShape.dimensions.copyOf())
+    private val strides: IntArray = this.shape.computeStrides()
+
+    private val backing: IntArray by lazy {
+        ActiveMemoryTracker.recordCopy(
+            "LazyZeroIntArrayTensorData.materialize",
+            shape.volume.toLong() * 4
+        )
+        IntArray(shape.volume)
+    }
+
+    override val buffer: IntArray
+        get() = backing
+
+    override fun get(vararg indices: Int): Int =
+        backing[calcFlatIndex(shape, strides, indices)]
+
+    override fun set(vararg indices: Int, value: Int) {
+        backing[calcFlatIndex(shape, strides, indices)] = value
+    }
+}
+
+private fun calcFlatIndex(shape: Shape, strides: IntArray, indices: IntArray): Int {
+    require(indices.size == shape.dimensions.size) {
+        "Number of indices (${indices.size}) must match tensor dimensions (${shape.dimensions.size})"
+    }
+    var flat = 0
+    for (i in indices.indices) {
+        val idx = indices[i]
+        require(idx >= 0 && idx < shape.dimensions[i]) {
+            "Index $idx out of bounds for dimension $i with size ${shape.dimensions[i]}"
+        }
+        flat += idx * strides[i]
+    }
+    return flat
+}
diff --git a/skainet-lang/skainet-lang-core/src/commonMain/kotlin/sk/ainet/lang/tensor/data/TensorDataFactory.kt b/skainet-lang/skainet-lang-core/src/commonMain/kotlin/sk/ainet/lang/tensor/data/TensorDataFactory.kt
@@ -11,6 +11,27 @@ import kotlin.reflect.KClass
  */
 public interface TensorDataFactory {
     public fun <T : DType, V> zeros(shape: Shape, dtype: KClass<T>): TensorData<T, V>
+
+    /**
+     * Allocates a zero-filled tensor whose underlying storage materializes lazily
+     * on first read.
+     *
+     * Behavior is identical to [zeros] for any caller that reads the tensor — a
+     * fresh zero buffer is produced on first access and cached for subsequent
+     * reads. The benefit is for callers that **never** read the tensor before
+     * replacing it, which is the common case in DSL-built modules whose
+     * parameters get substituted by a downstream weight loader (e.g.
+     * `WeightMapper.applyWeights` sets `parameter.value = loadedTensor`). For
+     * those callers, the `FloatArray(shape.volume)` allocation never happens.
+     *
+     * The default implementation falls back to [zeros], preserving existing
+     * behavior for any custom factory that does not opt in. Implementations
+     * that have a meaningful lazy form (e.g. [DenseTensorDataFactory]) should
+     * override.
+     */
+    public fun <T : DType, V> placeholder(shape: Shape, dtype: KClass<T>): TensorData<T, V> =
+        zeros(shape, dtype)
+
     public fun <T : DType, V> ones(shape: Shape, dtype: KClass<T>): TensorData<T, V>
     public fun <T : DType, V> full(shape: Shape, dtype: KClass<T>, value: Number): TensorData<T, V>
     public fun <T : DType, V> randn(
diff --git a/skainet-lang/skainet-lang-core/src/commonTest/kotlin/sk/ainet/lang/tensor/data/PlaceholderTensorDataTest.kt b/skainet-lang/skainet-lang-core/src/commonTest/kotlin/sk/ainet/lang/tensor/data/PlaceholderTensorDataTest.kt
@@ -0,0 +1,115 @@
+package sk.ainet.lang.tensor.data
+
+import sk.ainet.lang.tensor.Shape
+import sk.ainet.lang.types.FP32
+import sk.ainet.lang.types.Int32
+import sk.ainet.lang.types.Int8
+import kotlin.test.Test
+import kotlin.test.assertEquals
+import kotlin.test.assertNotSame
+import kotlin.test.assertSame
+
+/**
+ * Pins the contract for [TensorDataFactory.placeholder]:
+ *
+ * 1. Reports the requested shape without touching the underlying buffer.
+ * 2. Materializes to zeros on the first read — value parity with [zeros].
+ * 3. Caches the materialized buffer (no re-allocation across reads).
+ *
+ * The benefit (deferred allocation) doesn't show up directly in unit tests, but
+ * the parity guarantee means any caller that *does* read the tensor sees the
+ * same values an eager [zeros] call would have produced — so dropping in
+ * `placeholder` for `zeros` in DSL parameter init is a strict improvement.
+ */
+class PlaceholderTensorDataTest {
+
+    private val factory = DenseTensorDataFactory()
+
+    @Test
+    fun placeholder_reports_shape_without_materializing() {
+        val shape = Shape(64, 64)
+        val td = factory.placeholder<FP32, Float>(shape, FP32::class)
+
+        // Reading shape must not require allocating the underlying buffer.
+        assertEquals(shape, td.shape)
+        // Returned shape is a defensive copy — mutating one shouldn't affect the
+        // factory-issued tensor's view.
+        assertEquals(64, td.shape.dimensions[0])
+        assertEquals(64, td.shape.dimensions[1])
+    }
+
+    @Test
+    fun placeholder_materializes_to_zeros_on_first_read_fp32() {
+        val td = factory.placeholder<FP32, Float>(Shape(2, 3), FP32::class)
+
+        // Every position reads as 0.0f — same as zeros().
+        for (i in 0 until 2) for (j in 0 until 3) {
+            assertEquals(0.0f, td[i, j], "[$i,$j] must be 0.0f on first read")
+        }
+    }
+
+    @Test
+    fun placeholder_supports_writes_and_reads_back_fp32() {
+        val td = factory.placeholder<FP32, Float>(Shape(4), FP32::class)
+
+        td[2] = 7.5f
+        assertEquals(7.5f, td[2])
+        assertEquals(0.0f, td[0])
+        assertEquals(0.0f, td[3])
+    }
+
+    @Test
+    fun placeholder_buffer_is_stable_across_reads() {
+        val td = factory.placeholder<FP32, Float>(Shape(8), FP32::class)
+            as FloatArrayTensorData<FP32>
+
+        val first = td.buffer
+        val second = td.buffer
+        // Same backing FloatArray on every access — the lazy fires once.
+        assertSame(first, second, "buffer must be cached after first materialization")
+    }
+
+    @Test
+    fun placeholder_value_parity_with_zeros_fp32() {
+        val shape = Shape(5, 7)
+        val placeholder = factory.placeholder<FP32, Float>(shape, FP32::class)
+        val zeros = factory.zeros<FP32, Float>(shape, FP32::class)
+
+        for (i in 0 until 5) for (j in 0 until 7) {
+            assertEquals(zeros[i, j], placeholder[i, j],
+                "placeholder must match zeros at [$i,$j]")
+        }
+    }
+
+    @Test
+    fun placeholder_int32_materializes_to_zeros() {
+        val td = factory.placeholder<Int32, Int>(Shape(3), Int32::class)
+        assertEquals(0, td[0])
+        assertEquals(0, td[1])
+        assertEquals(0, td[2])
+    }
+
+    @Test
+    fun placeholder_int8_falls_back_to_zeros() {
+        // Int8 has no lazy variant — falls back to eager zeros. The test pins
+        // the value contract; it shouldn't throw and reads must be 0.
+        val td = factory.placeholder<Int8, Byte>(Shape(4), Int8::class)
+        for (i in 0 until 4) {
+            assertEquals(0.toByte(), td[i])
+        }
+    }
+
+    @Test
+    fun placeholder_returns_distinct_instances() {
+        // Two placeholder calls must not share underlying state — separate Linear
+        // layers must not see each other's writes.
+        val a = factory.placeholder<FP32, Float>(Shape(4), FP32::class)
+            as FloatArrayTensorData<FP32>
+        val b = factory.placeholder<FP32, Float>(Shape(4), FP32::class)
+            as FloatArrayTensorData<FP32>
+
+        assertNotSame(a.buffer, b.buffer)
+        a[0] = 99.0f
+        assertEquals(0.0f, b[0], "placeholder b must not see writes to placeholder a")
+    }
+}