Add initial implementation of Conv2d. Not working yet

michalharakal · michalharakal · commit a5de82f7ffcb · 2025-05-03T23:34:09.000+02:00
Related-To: #4
diff --git a/build.gradle.kts b/build.gradle.kts
@@ -10,7 +10,7 @@ plugins {
 
 allprojects {
     group = "sk.ai.net"
-    version = "0.0.5"
+    version = "0.0.6-SNAPSHOT"
 }
 
 moduleGraphConfig {
diff --git a/core/src/commonMain/kotlin/sk/ai/net/Shape.kt b/core/src/commonMain/kotlin/sk/ai/net/Shape.kt
@@ -1,5 +1,6 @@
 package sk.ai.net
 
+import sk.ai.net.impl.assert
 import sk.ai.net.impl.zipFold
 
 class Shape(vararg dimensions: Int) {
@@ -11,6 +12,20 @@ class Shape(vararg dimensions: Int) {
     val rank: Int
         get() = dimensions.size
 
+    internal fun index(indices: IntArray): Int {
+        assert(
+            { indices.size == dimensions.size },
+            { "`indices.size` must be ${dimensions.size}: ${indices.size}" })
+        return dimensions.zip(indices).fold(0) { a, x ->
+            assert({ 0 <= x.second && x.second < x.first }, { "Illegal index: indices = ${indices}, shape = $shape" })
+            a * x.first + x.second
+        }
+    }
+
+    operator fun get(vararg indices: Int): Int {
+        return dimensions[index(indices)]
+    }
+
     override fun equals(other: Any?): Boolean {
         if (other !is Shape) {
             return false
diff --git a/core/src/commonMain/kotlin/sk/ai/net/Tensor.kt b/core/src/commonMain/kotlin/sk/ai/net/Tensor.kt
@@ -63,30 +63,29 @@ interface Tensor {
     fun cos(): Tensor
 
     fun tan(): Tensor
-    
+
     fun asin(): Tensor
 
     fun acos(): Tensor
-    
-    fun atan(): Tensor
 
-    fun sinh():Tensor
+    fun atan(): Tensor
 
-    fun cosh():Tensor
+    fun sinh(): Tensor
 
-    fun tanh():Tensor
+    fun cosh(): Tensor
 
-    fun exp():Tensor
+    fun tanh(): Tensor
 
-    fun log():Tensor
+    fun exp(): Tensor
 
-    fun sqrt():Tensor
+    fun log(): Tensor
 
-    fun cbrt():Tensor
+    fun sqrt(): Tensor
 
-    fun sigmoid():Tensor
+    fun cbrt(): Tensor
 
-    fun ln():Tensor
+    fun sigmoid(): Tensor
 
+    fun ln(): Tensor
 }
 
diff --git a/core/src/commonMain/kotlin/sk/ai/net/TensorFactory.kt b/core/src/commonMain/kotlin/sk/ai/net/TensorFactory.kt
@@ -1,5 +1,15 @@
 package sk.ai.net
 
+import sk.ai.net.impl.BuiltInDoubleDataDescriptor
+import sk.ai.net.impl.DoublesTensor
+import kotlin.random.Random
+
 interface TensorFactory {
     fun createTensor(shape: Shape, dataDescriptor: DataDescriptor, elements: DoubleArray): Tensor
-}
+}
+
+fun rand(shape: Shape, dataDescriptor: DataDescriptor = BuiltInDoubleDataDescriptor()): Tensor {
+    val random: Random = Random.Default
+
+    return DoublesTensor(shape, DoubleArray(shape.volume) { random.nextFloat().toDouble() })
+}
diff --git a/core/src/commonMain/kotlin/sk/ai/net/impl/DoublesTensor.kt b/core/src/commonMain/kotlin/sk/ai/net/impl/DoublesTensor.kt
@@ -6,6 +6,7 @@ import sk.ai.net.Tensor
 import kotlin.collections.map
 import kotlin.math.exp
 import kotlin.math.pow
+import kotlin.random.Random
 
 data class DoublesTensor(override val shape: Shape, val elements: DoubleArray) : TypedTensor<Double> {
     constructor(shape: Shape, element: Double = 0.0) : this(
diff --git a/core/src/commonMain/kotlin/sk/ai/net/nn/Conv2d.kt b/core/src/commonMain/kotlin/sk/ai/net/nn/Conv2d.kt
@@ -0,0 +1,121 @@
+package sk.ai.net.nn
+
+import sk.ai.net.Shape
+import sk.ai.net.Tensor
+import sk.ai.net.impl.DoublesTensor
+import sk.ai.net.rand
+import kotlin.math.sqrt
+
+class Conv2d(
+    val inChannels: Int,
+    val outChannels: Int,
+    val kernelSize: Int,
+    val stride: Int = 1,
+    val padding: Int = 0,
+    useBias: Boolean = true
+) {
+    val weight: Tensor
+    val bias: Tensor?
+
+    init {
+        // Initialize weights and bias
+        val fanIn = inChannels * kernelSize * kernelSize
+        val bound = 1f / sqrt(fanIn.toDouble()).toFloat()  // 1/sqrt(fanIn)
+        // Weight: uniform in [-bound, bound]
+        weight = (((rand(
+            Shape(
+                outChannels,
+                inChannels,
+                kernelSize,
+                kernelSize
+            )
+        ) as DoublesTensor) * (2f * bound).toDouble()) as DoublesTensor) - bound.toDouble()
+        // Bias: uniform in [-bound, bound] if enabled
+        bias = if (useBias) {
+            ((rand(Shape(outChannels)) as DoublesTensor) * (2f * bound).toDouble()) - bound.toDouble()
+        } else {
+            null
+        }
+    }
+
+    operator fun invoke(input: Tensor): Tensor {
+        // Ensure input has 3D or 4D shape
+        val shape = input.shape  // assume shape is a list or array of dimensions
+        require(shape.rank == 3 || shape.rank == 4) {
+            "Conv2d expected 3D or 4D input tensor, but got shape ${shape}."
+        }
+        // Determine batch size and input dims
+        val batchSize: Int
+        val inC: Int
+        val inH: Int
+        val inW: Int
+        if (shape.rank == 4) {
+            batchSize = shape.dimensions[0]
+            inC = shape[1]
+            inH = shape[2]
+            inW = shape[3]
+        } else {
+            // if 3D (C, H, W), treat as batch of size 1
+            batchSize = 1
+            inC = shape[0]
+            inH = shape[1]
+            inW = shape[2]
+        }
+        require(inC == inChannels) {
+            "Conv2d expected input channel count $inChannels, but got $inC."
+        }
+
+        // Compute output spatial dimensions
+        val outH = (inH + 2 * padding - kernelSize) / stride + 1
+        val outW = (inW + 2 * padding - kernelSize) / stride + 1
+        require(outH > 0 && outW > 0) {
+            "Conv2d output size is invalid (outH=$outH, outW=$outW). Check input dimensions and padding."
+        }
+
+        // Apply padding if needed
+        val paddedInput: Tensor = if (padding > 0) {
+            val paddedH = inH + 2 * padding
+            val paddedW = inW + 2 * padding
+            val temp = Tensor.zeros(batchSize, inC, paddedH, paddedW)
+            for (n in 0 until batchSize) {
+                for (c in 0 until inC) {
+                    for (i in 0 until inH) {
+                        for (j in 0 until inW) {
+                            temp[n, c, i + padding, j + padding] = input[n, c, i, j]
+                        }
+                    }
+                }
+            }
+            temp
+        } else {
+            input  // no padding needed
+        }
+
+        // Prepare output tensor
+        val output = Tensor.zeros(batchSize, outChannels, outH, outW)
+
+        // Convolution: iterate over batch, out channels, and output spatial positions
+        for (n in 0 until batchSize) {
+            for (oc in 0 until outChannels) {
+                val biasVal = if (bias != null) bias[oc] else 0f
+                for (i in 0 until outH) {
+                    for (j in 0 until outW) {
+                        var sum = 0f
+                        // Sum over all input channels and kernel elements
+                        for (c in 0 until inChannels) {
+                            for (ki in 0 until kernelSize) {
+                                for (kj in 0 until kernelSize) {
+                                    sum += paddedInput[n, c, i * stride + ki, j * stride + kj] *
+                                            weight[oc, c, ki, kj]
+                                }
+                            }
+                        }
+                        // Add bias and assign to output
+                        output[n, oc, i, j] = sum + biasVal
+                    }
+                }
+            }
+        }
+        return output
+    }
+}
diff --git a/gguf/src/commonMain/kotlin/sk/ai/net/gguf/GGUFReader.kt b/gguf/src/commonMain/kotlin/sk/ai/net/gguf/GGUFReader.kt
@@ -92,8 +92,8 @@ class GGUFReader(source: Source) {
     }
 
     /** Retrieve a metadata field as a list of Strings (for array-of-string fields) */
-    fun getStringList(key: String): List<String>? {
-        val field = this.fields[key] ?: return null
+    fun getStringList(key: String): List<String> {
+        val field = this.fields[key] ?: return emptyList()
         // Expect an array of strings: types[0] == ARRAY and types[1] == STRING (per format)
         if (field.types.size >= 2 &&
             field.types[0] == GGUFValueType.ARRAY && field.types[1] == GGUFValueType.STRING
@@ -104,7 +104,7 @@ class GGUFReader(source: Source) {
                 byteList.toUByteArray().toByteArray().decodeToString()
             }
         }
-        return null  // Not an array-of-strings field
+        return emptyList()  // Not an array-of-strings field
     }
 
 
diff --git a/gguf/src/jvmTest/kotlin/sk/ai/net/gguf/GGUFStringReaderTest.kt b/gguf/src/jvmTest/kotlin/sk/ai/net/gguf/GGUFStringReaderTest.kt
@@ -0,0 +1,27 @@
+package sk.ai.net.gguf
+
+import junit.framework.Assert.assertEquals
+import kotlinx.io.asSource
+import kotlinx.io.buffered
+import org.junit.Test
+
+
+class GGUFReaderTest {
+
+    @Test
+    fun testReadMetadataFields() {
+        javaClass.getResourceAsStream("/skainet-small.gguf").use { inputStream ->
+
+            val reader = GGUFReader(inputStream.asSource().buffered())
+
+            // Verify the 'model_name' metadata is correct
+            val modelName = reader.getString("model_name")
+            assertEquals("model_name should match", "skainet-small", modelName)
+
+            // Verify the 'authors' metadata list is correct
+            val authorsList = reader.getStringList("authors")
+            assertEquals("authors list should match", 2, authorsList.size)
+            //assertEquals (listOf("Alice", "Bob"), authorsList, "authors list should match")
+        }
+    }
+}

Original file line number	Diff line number	Diff line change
`@@ -10,7 +10,7 @@ plugins {`
`10`	`10`
`11`	`11`	`allprojects {`
`12`	`12`	`group = "sk.ai.net"`
`13`		`- version = "0.0.5"`
	`13`	`+ version = "0.0.6-SNAPSHOT"`
`14`	`14`	`}`
`15`	`15`
`16`	`16`	`moduleGraphConfig {`
Original file line number	Diff line number	Diff line change
`@@ -92,8 +92,8 @@ class GGUFReader(source: Source) {`
`92`	`92`	`}`
`93`	`93`
`94`	`94`	`/** Retrieve a metadata field as a list of Strings (for array-of-string fields) */`
`95`		`- fun getStringList(key: String): List<String>? {`
`96`		`- val field = this.fields[key] ?: return null`
	`95`	`+ fun getStringList(key: String): List<String> {`
	`96`	`+ val field = this.fields[key] ?: return emptyList()`
`97`	`97`	`// Expect an array of strings: types[0] == ARRAY and types[1] == STRING (per format)`
`98`	`98`	`if (field.types.size >= 2 &&`
`99`	`99`	`field.types[0] == GGUFValueType.ARRAY && field.types[1] == GGUFValueType.STRING`
`@@ -104,7 +104,7 @@ class GGUFReader(source: Source) {`
`104`	`104`	`byteList.toUByteArray().toByteArray().decodeToString()`
`105`	`105`	`}`
`106`	`106`	`}`
`107`		`- return null // Not an array-of-strings field`
	`107`	`+ return emptyList() // Not an array-of-strings field`
`108`	`108`	`}`
`109`	`109`
`110`	`110`