Skip to content

Commit 8bf19bc

Browse files
committed
feat: support NVFP4 quants
1 parent e0986fa commit 8bf19bc

File tree

3 files changed

+5
-2
lines changed

3 files changed

+5
-2
lines changed

src/gguf/types/GgufMetadataTypes.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,8 @@ export enum GgufFileType {
193193
MOSTLY_Q4_0_8_8 = 35, // deprecated
194194
MOSTLY_TQ1_0 = 36,
195195
MOSTLY_TQ2_0 = 37,
196-
MOSTLY_MXFP4_MOE = 38
196+
MOSTLY_MXFP4_MOE = 38,
197+
MOSTLY_NVFP4 = 39
197198
}
198199

199200

src/gguf/types/GgufTensorInfoTypes.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,5 +60,6 @@ export const enum GgmlType {
6060
IQ4_NL_4_4 = 36,
6161
IQ4_NL_4_8 = 37,
6262
IQ4_NL_8_8 = 38,
63-
MXFP4 = 39 // MXFP4 (1 block)
63+
MXFP4 = 39, // MXFP4 (1 block)
64+
NVFP4 = 40 // NVFP4 (4 blocks, E4M3 scale)
6465
}

src/gguf/utils/ggufQuantNames.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ export const ggufQuantNames = new Map<string, GgufFileType>([
44
["Q4_0", GgufFileType.MOSTLY_Q4_0],
55
["Q4_1", GgufFileType.MOSTLY_Q4_1],
66
["MXFP4", GgufFileType.MOSTLY_MXFP4_MOE],
7+
["NVFP4", GgufFileType.MOSTLY_MXFP4_MOE],
78
["Q5_0", GgufFileType.MOSTLY_Q5_0],
89
["Q5_1", GgufFileType.MOSTLY_Q5_1],
910
["IQ2_XXS", GgufFileType.MOSTLY_IQ2_XXS],

0 commit comments

Comments
 (0)