SKaiNET-developers · michalharakal · Mar 9, 2026 · Mar 8, 2026 · Mar 8, 2026 · Mar 8, 2026
diff --git a/README.md b/README.md
@@ -0,0 +1,44 @@
+# SKaiNET-transformers
+
+SKaiNET-transformers is a high-performance LLM (Large Language Model) application layer built on top of the [SKaiNET](https://github.com/SKaiNET-developers/SKaiNET.git) engine. It provides a set of runtimes and CLI tools for various transformer-based models, optimized for Kotlin Multiplatform.
+
+## Key Features
+
+- **Multi-Model Support**: Implementations for popular architectures including Llama, Gemma, Qwen, and BERT.
+- **Engineered for Performance**: Uses the SKaiNET library as its core inference engine, leveraging hardware acceleration where available.
+- **Kotlin Multiplatform**: Designed to run across different platforms (JVM, Native, Android, etc.).
+- **Efficient Weights Loading**: Support for `safetensors` format for fast and safe model loading.
+
+## Project Structure
+
+- `llm-core`: Core abstractions and base classes for LLM components.
+- `llm-inference`: Model-specific inference logic (Llama, BERT, Gemma, Qwen).
+- `llm-runtime`: Platform-specific runtime implementations.
+- `llm-apps`: Ready-to-use CLI applications for model interaction and testing.
+- `llm-agent`: High-level agentic capabilities (in development).
+
+## Getting Started
+
+### Prerequisites
+
+- JDK 17 or higher
+- Gradle
+
+### Running the CLI Tools
+
+You can run the provided CLI tools using Gradle. For example, to run the BERT CLI:
+
+```bash
+./gradlew :llm-apps:kbert-cli:run --args="/path/to/model-dir 'query text'"
+```
+
+Replace `/path/to/model-dir` with a directory containing `model.safetensors`, `vocab.txt`, and `config.json`.
+
+## Engine
+
+This project uses **SKaiNET** as its underlying execution engine. 
+GitHub: [https://github.com/SKaiNET-developers/SKaiNET](https://github.com/SKaiNET-developers/SKaiNET.git)
+
+## License
+
+[Add License Information Here]
diff --git a/build.gradle.kts b/build.gradle.kts
@@ -14,7 +14,8 @@ allprojects {
     group = "sk.ainet.llm"
 }
 
-// Require JDK 21+ but allow any newer version (produces Java 21 bytecode via --release / jvmTarget)
+// Require JDK 21+ for bytecode target; JDK 25 recommended (set via jenv local 25.0).
+// Produces Java 21 bytecode via --release / jvmTarget for backward compatibility.
 subprojects {
     require(JavaVersion.current() >= JavaVersion.VERSION_21) {
         "This project requires JDK 21+, but found ${JavaVersion.current()}"

diff --git a/gradle.properties b/gradle.properties
@@ -1,12 +1,12 @@
-GROUP=sk.ainet.llm
-VERSION_NAME=0.1.0
+GROUP=sk.ainet.transformers
+VERSION_NAME=0.3.0
 
-POM_DESCRIPTION=SKaiNET-LLM
+POM_DESCRIPTION=SKaiNET-transformers
 
-POM_URL=https://github.com/SKaiNET-developers/skainet/
-POM_SCM_URL=https://github.com/SKaiNET-developers/skainet
-POM_SCM_CONNECTION=scm:git:git@github.com:SKaiNET-developers/skainet.git
-POM_SCM_DEV_CONNECTION=scm:git:ssh@github.com:SKaiNET-developers/skainet.git
+POM_URL=https://github.com/SKaiNET-developers/SKaiNET-transformers/
+POM_SCM_URL=https://github.com/SKaiNET-developers/SKaiNET-transformers
+POM_SCM_CONNECTION=scm:git:git@github.com:SKaiNET-developers/SKaiNET-transformers.git
+POM_SCM_DEV_CONNECTION=scm:git:ssh@github.com:SKaiNET-developers/SKaiNET-transformers.git
 
 POM_LICENCE_NAME=MIT License
 POM_LICENCE_URL=https://mit-license.org/

diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml
@@ -3,7 +3,7 @@ skainet = "0.15.3"
 agp = "8.13.0"
 jacksonDatabind = "2.21.1"
 jsonSchemaValidator = "3.0.0"
-jsonSchemaValidatorVersion = "0.5.3"
+jsonSchemaValidatorVersion = "0.5.4"
 junit = "4.13.2"
 junitJupiter = "6.0.3"
 kotlin = "2.3.0"
@@ -12,7 +12,7 @@ kotlinBrowser = "0.5.0"
 android-minSdk = "24"
 android-compileSdk = "36"
 kotlinxSerializationJson = "1.9.0"
-ktorClientCore = "3.1.1"
+ktorClientCore = "3.4.1"
 ktorClientPlugins = "3.1.1"
 logbackClassic = "1.5.32"
 kover = "0.9.7"

diff --git a/llm-agent/gradle.properties b/llm-agent/gradle.properties
@@ -1,4 +1,4 @@
-POM_ARTIFACT_ID=skainet-kllama-agents
+POM_ARTIFACT_ID=skainet-transformers-agent
 POM_NAME=Agentic API for kllama
 
 kotlin.mpp.applyDefaultHierarchyTemplate=false
diff --git a/llm-apps/kllama-cli/gradle.properties b/llm-apps/kllama-cli/gradle.properties
@@ -1,4 +1,4 @@
-POM_ARTIFACT_ID=skainet-apps-kllama-cli
+POM_ARTIFACT_ID=skainet-transformers-apps-kllama-cli
 POM_NAME=skainet neural network scripting API
 
 kotlin.mpp.applyDefaultHierarchyTemplate=false
diff --git a/llm-bom/gradle.properties b/llm-bom/gradle.properties
@@ -1,2 +1,2 @@
-POM_ARTIFACT_ID=skainet-llm-bom
-POM_NAME=SKaiNET-LLM Bill of Materials
+POM_ARTIFACT_ID=skainet-transformers-bom
+POM_NAME=SKaiNET-transformers Bill of Materials
diff --git a/llm-core/gradle.properties b/llm-core/gradle.properties
@@ -1,4 +1,4 @@
-POM_ARTIFACT_ID=skainet-llm
-POM_NAME=skainet llm core
+POM_ARTIFACT_ID=skainet-transformers-core
+POM_NAME=skainet transformers core
 
 kotlin.mpp.applyDefaultHierarchyTemplate=false
diff --git a/llm-inference/bert/gradle.properties b/llm-inference/bert/gradle.properties
@@ -1,2 +1,2 @@
-POM_ARTIFACT_ID=skainet-model-bert
+POM_ARTIFACT_ID=skainet-transformers-inference-bert
 POM_NAME=skainet bert embeddings loader
diff --git a/llm-inference/gemma/gradle.properties b/llm-inference/gemma/gradle.properties
@@ -1,2 +1,2 @@
-POM_ARTIFACT_ID=skainet-model-gemma
+POM_ARTIFACT_ID=skainet-transformers-inference-gemma
 POM_NAME=skainet gemma model loader
diff --git a/llm-inference/gemma/src/commonMain/kotlin/sk/ainet/models/gemma/ActivationSparsity.kt b/llm-inference/gemma/src/commonMain/kotlin/sk/ainet/models/gemma/ActivationSparsity.kt
@@ -0,0 +1,93 @@
+package sk.ainet.models.gemma
+
+import kotlin.math.abs
+import kotlin.math.ln
+import kotlin.math.sqrt
+
+/**
+ * Activation sparsity via Gaussian top-k selection.
+ *
+ * Used in Gemma 3n E4B to zero out a fraction of FFN activations,
+ * reducing effective computation while preserving output quality.
+ *
+ * The threshold is computed assuming activations follow a Gaussian
+ * distribution: the (1-sparsityRate) quantile of N(mean, std) is used
+ * as a cutoff, and values with |x - mean| below that threshold are zeroed.
+ */
+public object ActivationSparsity {
+
+    /**
+     * Apply Gaussian top-k sparsity to activation values.
+     *
+     * Keeps only the top (1 - sparsityRate) fraction of activations
+     * by magnitude (relative to the distribution), zeroing the rest.
+     *
+     * @param values Activation values (modified in-place for efficiency)
+     * @param sparsityRate Fraction of values to zero out (0.0 to 1.0). E.g., 0.95 keeps top 5%.
+     * @return The same array with sparse values zeroed
+     */
+    public fun applyGaussianTopK(values: FloatArray, sparsityRate: Float): FloatArray {
+        if (sparsityRate <= 0f || values.isEmpty()) return values
+        if (sparsityRate >= 1f) {
+            values.fill(0f)
+            return values
+        }
+
+        // Compute mean and std
+        var sum = 0.0
+        for (v in values) sum += v
+        val mean = (sum / values.size).toFloat()
+
+        var variance = 0.0
+        for (v in values) {
+            val d = (v - mean).toDouble()
+            variance += d * d
+        }
+        val std = sqrt(variance / values.size).toFloat()
+
+        if (std < 1e-10f) return values
+
+        // Compute threshold: the z-score corresponding to keeping top (1 - sparsityRate) by magnitude
+        // We want the quantile at (1 + sparsityRate) / 2 for two-tailed
+        val z = inverseNormalCDF((1.0 + sparsityRate) / 2.0).toFloat()
+        val threshold = z * std
+
+        // Zero out values with |x - mean| < threshold
+        for (i in values.indices) {
+            if (abs(values[i] - mean) < threshold) {
+                values[i] = 0f
+            }
+        }
+
+        return values
+    }
+
+    /**
+     * Approximation of the inverse normal CDF (probit function)
+     * using Abramowitz & Stegun rational approximation.
+     *
+     * Accurate to ~4.5e-4 for p in (0, 1).
+     */
+    internal fun inverseNormalCDF(p: Double): Double {
+        if (p <= 0.0) return Double.NEGATIVE_INFINITY
+        if (p >= 1.0) return Double.POSITIVE_INFINITY
+
+        return if (p < 0.5) {
+            -rationalApprox(sqrt(-2.0 * ln(p)))
+        } else {
+            rationalApprox(sqrt(-2.0 * ln(1.0 - p)))
+        }
+    }
+
+    // Abramowitz & Stegun constants
+    private const val C0 = 2.515517
+    private const val C1 = 0.802853
+    private const val C2 = 0.010328
+    private const val D1 = 1.432788
+    private const val D2 = 0.189269
+    private const val D3 = 0.001308
+
+    private fun rationalApprox(t: Double): Double {
+        return t - (C0 + C1 * t + C2 * t * t) / (1.0 + D1 * t + D2 * t * t + D3 * t * t * t)
+    }
+}