Skip to content

Commit 8415003

Browse files
Merge pull request #1 from sk-ai-net/gguf-reader
Add GGUFReader
2 parents fbf251f + c163fe1 commit 8415003

13 files changed

Lines changed: 805 additions & 3 deletions

File tree

.gitignore

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
local.properties
1+
local.properties
22
.gradle
33
build/
44
!gradle/wrapper/gradle-wrapper.jar
@@ -20,4 +20,4 @@ out/
2020
.vscode/
2121

2222
### Mac OS ###
23-
.DS_Store
23+
.DS_Store

build.gradle.kts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
plugins {
22
alias(libs.plugins.androidLibrary) apply false
33
alias(libs.plugins.kotlinMultiplatform) apply false
4+
alias(libs.plugins.jetbrainsKotlinJvm) apply false
45
}

gguf/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
/build

gguf/build.gradle.kts

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
plugins {
2+
id("java-library")
3+
alias(libs.plugins.jetbrainsKotlinJvm)
4+
}
5+
6+
java {
7+
sourceCompatibility = JavaVersion.VERSION_11
8+
targetCompatibility = JavaVersion.VERSION_11
9+
}
10+
11+
kotlin {
12+
compilerOptions {
13+
jvmTarget = org.jetbrains.kotlin.gradle.dsl.JvmTarget.JVM_11
14+
}
15+
}
16+
dependencies {
17+
testImplementation(libs.testng)
18+
}
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
package sk.ai.net.gguf
2+
3+
/**
4+
* This is a kotlin gguf reader related logic interpreted from python code "gguf-py/gguf/constants.py"
5+
* of github repo "https://github.com/ggerganov/llama.cpp"
6+
*/
7+
8+
//TODO convert the rest of file from constants.py
9+
10+
const val GGUF_MAGIC = 0x46554747u
11+
const val GGUF_VERSION = 3
12+
const val GGUF_DEFAULT_ALIGNMENT = 32
13+
14+
enum class GGMLQuantizationType(val value: Int) {
15+
F32(0),
16+
F16(1),
17+
Q4_0(2),
18+
Q4_1(3),
19+
Q5_0(6),
20+
Q5_1(7),
21+
Q8_0(8),
22+
Q8_1(9),
23+
Q2_K(10),
24+
Q3_K(11),
25+
Q4_K(12),
26+
Q5_K(13),
27+
Q6_K(14),
28+
Q8_K(15),
29+
IQ2_XXS(16),
30+
IQ2_XS(17),
31+
IQ3_XXS(18),
32+
IQ1_S(19),
33+
IQ4_NL(20),
34+
IQ3_S(21),
35+
IQ2_S(22),
36+
IQ4_XS(23),
37+
I8(24),
38+
I16(25),
39+
I32(26),
40+
I64(27),
41+
F64(28),
42+
IQ1_M(29),
43+
BF16(30),
44+
TQ1_0(34),
45+
TQ2_0(35);
46+
47+
companion object {
48+
fun fromValue(value: Int): GGMLQuantizationType? {
49+
return values().find { it.value == value }
50+
}
51+
}
52+
}
53+
54+
// Block size constant
55+
const val QK_K = 256
56+
57+
// Quantization type and corresponding sizes
58+
val GGML_QUANT_SIZES: Map<GGMLQuantizationType, Pair<Int, Int>> = mapOf(
59+
GGMLQuantizationType.F32 to (1 to 4),
60+
GGMLQuantizationType.F16 to (1 to 2),
61+
GGMLQuantizationType.Q4_0 to (32 to 2 + 16),
62+
GGMLQuantizationType.Q4_1 to (32 to 2 + 2 + 16),
63+
GGMLQuantizationType.Q5_0 to (32 to 2 + 4 + 16),
64+
GGMLQuantizationType.Q5_1 to (32 to 2 + 2 + 4 + 16),
65+
GGMLQuantizationType.Q8_0 to (32 to 2 + 32),
66+
GGMLQuantizationType.Q8_1 to (32 to 4 + 4 + 32),
67+
GGMLQuantizationType.Q2_K to (256 to 2 + 2 + QK_K / 16 + QK_K / 4),
68+
GGMLQuantizationType.Q3_K to (256 to 2 + QK_K / 4 + QK_K / 8 + 12),
69+
GGMLQuantizationType.Q4_K to (256 to 2 + 2 + QK_K / 2 + 12),
70+
GGMLQuantizationType.Q5_K to (256 to 2 + 2 + QK_K / 2 + QK_K / 8 + 12),
71+
GGMLQuantizationType.Q6_K to (256 to 2 + QK_K / 2 + QK_K / 4 + QK_K / 16),
72+
GGMLQuantizationType.Q8_K to (256 to 4 + QK_K + QK_K / 8),
73+
GGMLQuantizationType.IQ2_XXS to (256 to 2 + QK_K / 4),
74+
GGMLQuantizationType.IQ2_XS to (256 to 2 + QK_K / 4 + QK_K / 32),
75+
GGMLQuantizationType.IQ3_XXS to (256 to 2 + QK_K / 4 + QK_K / 8),
76+
GGMLQuantizationType.IQ1_S to (256 to 2 + QK_K / 8 + QK_K / 16),
77+
GGMLQuantizationType.IQ4_NL to (32 to 2 + 16),
78+
GGMLQuantizationType.IQ3_S to (256 to 2 + QK_K / 4 + QK_K / 8 + QK_K / 32 + 4),
79+
GGMLQuantizationType.IQ2_S to (256 to 2 + QK_K / 4 + QK_K / 16),
80+
GGMLQuantizationType.IQ4_XS to (256 to 2 + 2 + QK_K / 2 + QK_K / 64),
81+
GGMLQuantizationType.I8 to (1 to 1),
82+
GGMLQuantizationType.I16 to (1 to 2),
83+
GGMLQuantizationType.I32 to (1 to 4),
84+
GGMLQuantizationType.I64 to (1 to 8),
85+
GGMLQuantizationType.F64 to (1 to 8),
86+
GGMLQuantizationType.IQ1_M to (256 to QK_K / 8 + QK_K / 16 + QK_K / 32),
87+
GGMLQuantizationType.BF16 to (1 to 2),
88+
GGMLQuantizationType.TQ1_0 to (256 to 2 + 4 * 13),
89+
GGMLQuantizationType.TQ2_0 to (256 to 2 + 64)
90+
)
91+
92+
enum class GGUFValueType(val value: Int) {
93+
UINT8(0),
94+
INT8(1),
95+
UINT16(2),
96+
INT16(3),
97+
UINT32(4),
98+
INT32(5),
99+
FLOAT32(6),
100+
BOOL(7),
101+
STRING(8),
102+
ARRAY(9),
103+
UINT64(10),
104+
INT64(11),
105+
FLOAT64(12)
106+
}

0 commit comments

Comments
 (0)