Merge pull request #437 from SKaiNET-developers/feature/chuncked-dequant-support

michalharakal · web-flow · commit d0e9e9566111 · 2026-03-25T19:24:27.000+01:00
Feature/chuncked dequant support
diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml
@@ -5,7 +5,7 @@ jsonSchemaValidator = "3.0.1"
 jsonSchemaValidatorVersion = "0.5.4"
 junit = "4.13.2"
 junitJupiter = "6.0.3"
-kotlin = "2.3.0"
+kotlin = "2.3.10"
 kotlinxCoroutines = "1.10.2"
 kotlinBrowser = "0.5.0"
 android-minSdk = "24"
diff --git a/kotlin-js-store/yarn.lock b/kotlin-js-store/yarn.lock
@@ -1095,6 +1095,11 @@ is-number@^7.0.0:
   resolved "https://registry.yarnpkg.com/is-number/-/is-number-7.0.0.tgz#7535345b896734d5f80c4d06c50955527a14f12b"
   integrity sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng==
 
+is-path-inside@^3.0.3:
+  version "3.0.3"
+  resolved "https://registry.yarnpkg.com/is-path-inside/-/is-path-inside-3.0.3.tgz#d231362e53a07ff2b0e0ea7fed049161ffd16283"
+  integrity sha512-Fd4gABb+ycGAmKou8eMftCupSir5lRxqf4aD/vd0cD2qc4HL07OjCeuHMr8Ro4CoMaeCKDB0/ECBOVWjTwUvPQ==
+
 is-plain-obj@^2.1.0:
   version "2.1.0"
   resolved "https://registry.yarnpkg.com/is-plain-obj/-/is-plain-obj-2.1.0.tgz#45e42e37fccf1f40da8e5f76ee21515840c09287"
@@ -1361,10 +1366,10 @@ mkdirp@^0.5.5:
   dependencies:
     minimist "^1.2.6"
 
-mocha@11.7.2:
-  version "11.7.2"
-  resolved "https://registry.yarnpkg.com/mocha/-/mocha-11.7.2.tgz#3c0079fe5cc2f8ea86d99124debcc42bb1ab22b5"
-  integrity sha512-lkqVJPmqqG/w5jmmFtiRvtA2jkDyNVUcefFJKb2uyX4dekk8Okgqop3cgbFiaIvj8uCRJVTP5x9dfxGyXm2jvQ==
+mocha@11.7.5:
+  version "11.7.5"
+  resolved "https://registry.yarnpkg.com/mocha/-/mocha-11.7.5.tgz#58f5bbfa5e0211ce7e5ee6128107cefc2515a627"
+  integrity sha512-mTT6RgopEYABzXWFx+GcJ+ZQ32kp4fMf0xvpZIIfSq9Z8lC/++MtcCnQ9t5FP2veYEP95FIYSvW+U9fV4xrlig==
   dependencies:
     browser-stdout "^1.3.1"
     chokidar "^4.0.1"
@@ -1374,6 +1379,7 @@ mocha@11.7.2:
     find-up "^5.0.0"
     glob "^10.4.5"
     he "^1.2.0"
+    is-path-inside "^3.0.3"
     js-yaml "^4.1.0"
     log-symbols "^4.1.0"
     minimatch "^9.0.5"
diff --git a/skainet-data/skainet-data-simple/src/jvmMain/kotlin/sk/ainet/data/cifar10/CIFAR10LoaderJvm.kt b/skainet-data/skainet-data-simple/src/jvmMain/kotlin/sk/ainet/data/cifar10/CIFAR10LoaderJvm.kt
@@ -81,9 +81,9 @@ public class CIFAR10LoaderJvm(config: CIFAR10LoaderConfig) : CIFAR10LoaderCommon
 
             // Configure timeout for large files (CIFAR-10 is ~170MB)
             install(HttpTimeout) {
-                requestTimeoutMillis = 300000 // 5 minutes
+                requestTimeoutMillis = 600000 // 10 minutes
                 connectTimeoutMillis = 60000 // 60 seconds
-                socketTimeoutMillis = 300000 // 5 minutes
+                socketTimeoutMillis = 600000 // 10 minutes
             }
         }
 
diff --git a/skainet-io/skainet-io-gguf/src/commonMain/kotlin/sk/ainet/io/gguf/dequant/DequantOps.kt b/skainet-io/skainet-io-gguf/src/commonMain/kotlin/sk/ainet/io/gguf/dequant/DequantOps.kt
@@ -247,6 +247,33 @@ public object DequantOps {
         }
     }
 
+    /**
+     * Returns (bytesPerBlock, elemsPerBlock) for a given quantization type.
+     * Useful for chunked dequantization on single-threaded platforms (WASM).
+     */
+    public fun blockInfoFor(type: GGMLQuantizationType): Pair<Int, Int> = when (type) {
+        GGMLQuantizationType.F16  -> Pair(2, 1)
+        GGMLQuantizationType.BF16 -> Pair(2, 1)
+        GGMLQuantizationType.F32  -> Pair(4, 1)
+        GGMLQuantizationType.Q4_0 -> Pair(18, 32)
+        GGMLQuantizationType.Q4_1 -> Pair(20, 32)
+        GGMLQuantizationType.Q5_0 -> Pair(22, 32)
+        GGMLQuantizationType.Q5_1 -> Pair(24, 32)
+        GGMLQuantizationType.Q8_0 -> Pair(34, 32)
+        GGMLQuantizationType.Q8_1 -> Pair(40, 32)
+        GGMLQuantizationType.IQ4_NL -> Pair(18, 32)
+        GGMLQuantizationType.IQ4_XS -> Pair(2 + 2 + QK_K / 2 + QK_K / 64, QK_K)
+        GGMLQuantizationType.Q2_K -> Pair(2 + 2 + QK_K / 16 + QK_K / 4, QK_K)
+        GGMLQuantizationType.Q3_K -> Pair(2 + QK_K / 4 + QK_K / 8 + 12, QK_K)
+        GGMLQuantizationType.Q4_K -> Pair(144, QK_K)
+        GGMLQuantizationType.Q5_K -> Pair(176, QK_K)
+        GGMLQuantizationType.Q6_K -> Pair(210, QK_K)
+        GGMLQuantizationType.Q8_K -> Pair(292, QK_K)
+        GGMLQuantizationType.TQ1_0 -> Pair(54, 256)
+        GGMLQuantizationType.TQ2_0 -> Pair(66, 256)
+        else -> error("Block info for $type not available")
+    }
+
     // ========== ByteArray-based quantization implementations ==========
 
     @Suppress("UNUSED_PARAMETER")

Original file line number	Diff line number	Diff line change
`@@ -81,9 +81,9 @@ public class CIFAR10LoaderJvm(config: CIFAR10LoaderConfig) : CIFAR10LoaderCommon`
`81`	`81`
`82`	`82`	`// Configure timeout for large files (CIFAR-10 is ~170MB)`
`83`	`83`	`install(HttpTimeout) {`
`84`		`- requestTimeoutMillis = 300000 // 5 minutes`
	`84`	`+ requestTimeoutMillis = 600000 // 10 minutes`
`85`	`85`	`connectTimeoutMillis = 60000 // 60 seconds`
`86`		`- socketTimeoutMillis = 300000 // 5 minutes`
	`86`	`+ socketTimeoutMillis = 600000 // 10 minutes`
`87`	`87`	`}`
`88`	`88`	`}`
`89`	`89`