Skip to content

Commit 8adf249

Browse files
Merge branch 'develop' into 0.0.1
2 parents 4dfb771 + cd29620 commit 8adf249

18 files changed

Lines changed: 1759 additions & 15 deletions

README.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,18 @@ SKaiNET-transformers is a high-performance LLM (Large Language Model) applicatio
2424
- `llm-apps`: Ready-to-use CLI applications for model interaction and testing.
2525
- `llm-agent`: High-level agentic capabilities (in development).
2626

27+
## Current Release
28+
29+
The current release is **0.16.0**. To use SKaiNET-transformers in your project, add the following dependency:
30+
31+
```kotlin
32+
dependencies {
33+
implementation("sk.ainet.transformers:llm-core:0.16.0")
34+
}
35+
```
36+
37+
Make sure to use a matching version of the SKaiNET engine (`sk.ainet.core:skainet-lang-core:0.16.0`).
38+
2739
## Getting Started
2840

2941
### Prerequisites

build.gradle.kts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@ allprojects {
1414
group = "sk.ainet.llm"
1515
}
1616

17-
// Require JDK 21+ but allow any newer version (produces Java 21 bytecode via --release / jvmTarget)
17+
// Require JDK 21+ for bytecode target; JDK 25 recommended (set via jenv local 25.0).
18+
// Produces Java 21 bytecode via --release / jvmTarget for backward compatibility.
1819
subprojects {
1920
require(JavaVersion.current() >= JavaVersion.VERSION_21) {
2021
"This project requires JDK 21+, but found ${JavaVersion.current()}"

gradle.properties

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
GROUP=sk.ainet.transformers
2-
VERSION_NAME=0.4.0
2+
VERSION_NAME=0.16.0
33

44
POM_DESCRIPTION=SKaiNET-transformers
55

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
package sk.ainet.models.gemma
2+
3+
import kotlin.math.abs
4+
import kotlin.math.ln
5+
import kotlin.math.sqrt
6+
7+
/**
8+
* Activation sparsity via Gaussian top-k selection.
9+
*
10+
* Used in Gemma 3n E4B to zero out a fraction of FFN activations,
11+
* reducing effective computation while preserving output quality.
12+
*
13+
* The threshold is computed assuming activations follow a Gaussian
14+
* distribution: the (1-sparsityRate) quantile of N(mean, std) is used
15+
* as a cutoff, and values with |x - mean| below that threshold are zeroed.
16+
*/
17+
public object ActivationSparsity {
18+
19+
/**
20+
* Apply Gaussian top-k sparsity to activation values.
21+
*
22+
* Keeps only the top (1 - sparsityRate) fraction of activations
23+
* by magnitude (relative to the distribution), zeroing the rest.
24+
*
25+
* @param values Activation values (modified in-place for efficiency)
26+
* @param sparsityRate Fraction of values to zero out (0.0 to 1.0). E.g., 0.95 keeps top 5%.
27+
* @return The same array with sparse values zeroed
28+
*/
29+
public fun applyGaussianTopK(values: FloatArray, sparsityRate: Float): FloatArray {
30+
if (sparsityRate <= 0f || values.isEmpty()) return values
31+
if (sparsityRate >= 1f) {
32+
values.fill(0f)
33+
return values
34+
}
35+
36+
// Compute mean and std
37+
var sum = 0.0
38+
for (v in values) sum += v
39+
val mean = (sum / values.size).toFloat()
40+
41+
var variance = 0.0
42+
for (v in values) {
43+
val d = (v - mean).toDouble()
44+
variance += d * d
45+
}
46+
val std = sqrt(variance / values.size).toFloat()
47+
48+
if (std < 1e-10f) return values
49+
50+
// Compute threshold: the z-score corresponding to keeping top (1 - sparsityRate) by magnitude
51+
// We want the quantile at (1 + sparsityRate) / 2 for two-tailed
52+
val z = inverseNormalCDF((1.0 + sparsityRate) / 2.0).toFloat()
53+
val threshold = z * std
54+
55+
// Zero out values with |x - mean| < threshold
56+
for (i in values.indices) {
57+
if (abs(values[i] - mean) < threshold) {
58+
values[i] = 0f
59+
}
60+
}
61+
62+
return values
63+
}
64+
65+
/**
66+
* Approximation of the inverse normal CDF (probit function)
67+
* using Abramowitz & Stegun rational approximation.
68+
*
69+
* Accurate to ~4.5e-4 for p in (0, 1).
70+
*/
71+
internal fun inverseNormalCDF(p: Double): Double {
72+
if (p <= 0.0) return Double.NEGATIVE_INFINITY
73+
if (p >= 1.0) return Double.POSITIVE_INFINITY
74+
75+
return if (p < 0.5) {
76+
-rationalApprox(sqrt(-2.0 * ln(p)))
77+
} else {
78+
rationalApprox(sqrt(-2.0 * ln(1.0 - p)))
79+
}
80+
}
81+
82+
// Abramowitz & Stegun constants
83+
private const val C0 = 2.515517
84+
private const val C1 = 0.802853
85+
private const val C2 = 0.010328
86+
private const val D1 = 1.432788
87+
private const val D2 = 0.189269
88+
private const val D3 = 0.001308
89+
90+
private fun rationalApprox(t: Double): Double {
91+
return t - (C0 + C1 * t + C2 * t * t) / (1.0 + D1 * t + D2 * t * t + D3 * t * t * t)
92+
}
93+
}
Lines changed: 217 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,217 @@
1+
package sk.ainet.models.gemma
2+
3+
import sk.ainet.context.ExecutionContext
4+
import sk.ainet.lang.tensor.Tensor
5+
import sk.ainet.lang.tensor.matmul
6+
import sk.ainet.lang.tensor.plus
7+
import sk.ainet.lang.tensor.times
8+
import sk.ainet.lang.tensor.t
9+
import sk.ainet.lang.types.DType
10+
import kotlin.reflect.KClass
11+
12+
/**
13+
* Global AltUp weights shared across all layers.
14+
*
15+
* @param projWeight Projects embedding into (numInputs-1) additional states [hiddenSize, hiddenSize, numInputs-1]
16+
* @param unembdProjWeight Projects back for output combination [hiddenSize, hiddenSize, numInputs-1]
17+
*/
18+
public data class AltUpGlobalWeights<T : DType>(
19+
val projWeight: Tensor<T, Float>,
20+
val unembdProjWeight: Tensor<T, Float>
21+
)
22+
23+
/**
24+
* Per-layer AltUp weights.
25+
*
26+
* @param predictCoef Prediction coefficients [numInputs, numInputs * numInputs]
27+
* @param correctCoef Correction coefficients [numInputs, numInputs]
28+
* @param correctScale Per-element scaling for correction [hiddenSize]
29+
* @param routerWeight Router projection [hiddenSize, numInputs]
30+
* @param routerNorm Router normalization [hiddenSize]
31+
*/
32+
public data class AltUpLayerWeights<T : DType>(
33+
val predictCoef: Tensor<T, Float>,
34+
val correctCoef: Tensor<T, Float>,
35+
val correctScale: Tensor<T, Float>,
36+
val routerWeight: Tensor<T, Float>,
37+
val routerNorm: Tensor<T, Float>
38+
)
39+
40+
/**
41+
* AltUp (Alternating Updates) implementation for Gemma 3n E4B.
42+
*
43+
* AltUp maintains multiple parallel hidden states (E4B: 4) but only routes
44+
* the "active" state through expensive transformer layers. The other states
45+
* are cheaply predicted/corrected using learned per-layer coefficients.
46+
*
47+
* Architecture (from GGUF inspection):
48+
* - Global: altup_proj [2048,2048,3] creates 3 extra states from embedding
49+
* - Per-layer: router projects hidden to routing logits, predict_coef/correct_coef
50+
* control state updates, correct_scale modulates corrections element-wise
51+
* - Global: altup_unembd_proj [2048,2048,3] recombines states for output
52+
*
53+
* @param ctx ExecutionContext for tensor operations
54+
* @param dtype Data type class
55+
* @param numInputs Number of parallel inputs (E4B: 4)
56+
* @param activeIdx Index of the active input (0)
57+
* @param hiddenSize Model hidden dimension
58+
* @param globalWeights Global projection/unprojection weights
59+
* @param layerWeights Per-layer AltUp weights
60+
*/
61+
public class AltUp<T : DType>(
62+
private val ctx: ExecutionContext,
63+
private val dtype: KClass<T>,
64+
private val numInputs: Int,
65+
public val activeIdx: Int,
66+
private val hiddenSize: Int,
67+
private val globalWeights: AltUpGlobalWeights<T>,
68+
private val layerWeights: List<AltUpLayerWeights<T>>
69+
) {
70+
71+
private val numExtra = numInputs - 1 // 3 for E4B
72+
73+
/**
74+
* Initialize AltUp states from a single embedding vector.
75+
*
76+
* The active state (idx 0) is the embedding itself.
77+
* Additional states are created by projecting the embedding through altup_proj slices.
78+
*
79+
* @param embedding The token embedding [hiddenSize]
80+
* @return List of [numInputs] state tensors
81+
*/
82+
public fun initialize(embedding: Tensor<T, Float>): List<Tensor<T, Float>> {
83+
val states = mutableListOf(embedding)
84+
85+
// Project embedding into additional states using altup_proj [hiddenSize, hiddenSize, numExtra]
86+
val projBuf = globalWeights.projWeight.expectFloatBuffer()
87+
val embBuf = embedding.expectFloatBuffer()
88+
val h = hiddenSize
89+
90+
for (k in 0 until numExtra) {
91+
val out = FloatArray(h)
92+
val offset = k * h * h
93+
for (i in 0 until h) {
94+
var sum = 0f
95+
for (j in 0 until h) {
96+
sum += projBuf[offset + i * h + j] * embBuf[j]
97+
}
98+
out[i] = sum
99+
}
100+
states.add(ctx.fromFloatArray<T, Float>(embedding.shape, dtype, out))
101+
}
102+
103+
return states
104+
}
105+
106+
/**
107+
* Predict phase: generate predictions for all states using per-layer coefficients.
108+
*
109+
* Uses the router to compute routing logits, then applies predict_coef to
110+
* create weighted combinations of states.
111+
*
112+
* @param layerIdx Layer index to get per-layer weights
113+
* @param states Current parallel states
114+
* @return Predicted states
115+
*/
116+
public fun predict(layerIdx: Int, states: List<Tensor<T, Float>>): List<Tensor<T, Float>> {
117+
val lw = layerWeights[layerIdx]
118+
val coeffBuf = lw.predictCoef.expectFloatBuffer()
119+
// predict_coef shape: [numInputs, numInputs * numInputs]
120+
// For each output state i, coefficients for combining input states
121+
val n = numInputs
122+
123+
return List(n) { i ->
124+
var result = states[i]
125+
for (j in 0 until n) {
126+
if (i != j) {
127+
// Use coefficient from the flattened matrix
128+
val coeff = coeffBuf[i * n + j]
129+
if (coeff != 0f) {
130+
result = addScaled(result, states[j], coeff)
131+
}
132+
}
133+
}
134+
result
135+
}
136+
}
137+
138+
/**
139+
* Correct phase: update all states after the active state passes through the layer.
140+
*
141+
* innovation = layerOutput - predictions[activeIdx]
142+
* corrected[i] = predictions[i] + coeff[i, activeIdx] * (correctScale * innovation)
143+
*
144+
* @param layerIdx Layer index
145+
* @param layerOutput Output of the transformer layer for the active state
146+
* @param predictions Predicted states from [predict]
147+
* @return Corrected states
148+
*/
149+
public fun correct(
150+
layerIdx: Int,
151+
layerOutput: Tensor<T, Float>,
152+
predictions: List<Tensor<T, Float>>
153+
): List<Tensor<T, Float>> {
154+
val lw = layerWeights[layerIdx]
155+
val innovation = addScaled(layerOutput, predictions[activeIdx], -1f)
156+
157+
// Apply element-wise scale to innovation
158+
val scaleBuf = lw.correctScale.expectFloatBuffer()
159+
val innBuf = innovation.expectFloatBuffer()
160+
val scaledInnovation = FloatArray(innBuf.size) { innBuf[it] * scaleBuf[it % scaleBuf.size] }
161+
val scaledInnovationTensor = ctx.fromFloatArray<T, Float>(innovation.shape, dtype, scaledInnovation)
162+
163+
val coeffBuf = lw.correctCoef.expectFloatBuffer()
164+
val n = numInputs
165+
166+
return List(n) { i ->
167+
if (i == activeIdx) {
168+
layerOutput
169+
} else {
170+
val coeff = coeffBuf[i * n + activeIdx]
171+
addScaled(predictions[i], scaledInnovationTensor, coeff)
172+
}
173+
}
174+
}
175+
176+
/**
177+
* Finalize: combine all states into a single output using altup_unembd_proj.
178+
*
179+
* output = states[activeIdx] + sum over k of (unembd_proj[k] @ states[k+1])
180+
*
181+
* @param states Final parallel states after all layers
182+
* @return Combined output tensor
183+
*/
184+
public fun finalize(states: List<Tensor<T, Float>>): Tensor<T, Float> {
185+
val unprojBuf = globalWeights.unembdProjWeight.expectFloatBuffer()
186+
val h = hiddenSize
187+
var result = states[activeIdx].expectFloatBuffer().copyOf()
188+
189+
// Add projected extra states
190+
for (k in 0 until numExtra) {
191+
val stateBuf = states[k + 1].expectFloatBuffer()
192+
val offset = k * h * h
193+
for (i in 0 until h) {
194+
var sum = 0f
195+
for (j in 0 until h) {
196+
sum += unprojBuf[offset + i * h + j] * stateBuf[j]
197+
}
198+
result[i] += sum
199+
}
200+
}
201+
202+
return ctx.fromFloatArray<T, Float>(states[activeIdx].shape, dtype, result)
203+
}
204+
205+
private fun addScaled(a: Tensor<T, Float>, b: Tensor<T, Float>, bScale: Float): Tensor<T, Float> {
206+
val aBuf = a.expectFloatBuffer()
207+
val bBuf = b.expectFloatBuffer()
208+
val out = FloatArray(aBuf.size) { aBuf[it] + bScale * bBuf[it] }
209+
return ctx.fromFloatArray<T, Float>(a.shape, dtype, out)
210+
}
211+
212+
private fun Tensor<T, Float>.expectFloatBuffer(): FloatArray {
213+
val data = this.data
214+
if (data is sk.ainet.lang.tensor.data.FloatArrayTensorData<*>) return data.buffer
215+
return data.copyToFloatArray()
216+
}
217+
}

0 commit comments

Comments
 (0)