@@ -93,6 +93,12 @@ export type SafetensorsParseFromRepo =
9393 header : SafetensorsFileHeader ;
9494 parameterCount ?: Partial < Record < Dtype , number > > ;
9595 parameterTotal ?: number ;
96+ /**
97+ * For Mixture-of-Experts models: breakdown of routed vs. always-active params,
98+ * computed when `computeParametersCount: true` and the repo's `config.json`
99+ * exposes MoE fields. Undefined for dense models.
100+ */
101+ moe ?: MoeInfo ;
96102 filepaths : string [ ] ;
97103 }
98104 | {
@@ -101,6 +107,12 @@ export type SafetensorsParseFromRepo =
101107 headers : SafetensorsShardedHeaders ;
102108 parameterCount ?: Partial < Record < Dtype , number > > ;
103109 parameterTotal ?: number ;
110+ /**
111+ * For Mixture-of-Experts models: breakdown of routed vs. always-active params,
112+ * computed when `computeParametersCount: true` and the repo's `config.json`
113+ * exposes MoE fields. Undefined for dense models.
114+ */
115+ moe ?: MoeInfo ;
104116 filepaths : string [ ] ;
105117 } ;
106118
@@ -323,6 +335,7 @@ export async function parseSafetensorsMetadata(
323335 parameterCount : computeNumOfParamsByDtypeSingleFile ( header , quantConfig ) ,
324336 /// shortcut: get param count directly from metadata
325337 parameterTotal : parseTotalParameters ( header . __metadata__ ?. total_parameters ) ,
338+ moe : computeMoeInfoFromHeaders ( [ header ] , modelConfig ) ,
326339 }
327340 : undefined ;
328341 return {
@@ -345,6 +358,7 @@ export async function parseSafetensorsMetadata(
345358 parameterCount : computeNumOfParamsByDtypeSharded ( shardedMap , quantConfig ) ,
346359 /// shortcut: get param count directly from metadata
347360 parameterTotal : parseTotalParameters ( index . metadata ?. total_parameters ) ,
361+ moe : computeMoeInfoFromHeaders ( Object . values ( shardedMap ) , modelConfig ) ,
348362 }
349363 : undefined ;
350364 return {
@@ -370,9 +384,45 @@ export interface QuantizationConfig {
370384 config_groups ?: Record < string , { weights ?: { num_bits ?: number } } > ;
371385}
372386
373- export interface ModelConfig {
387+ interface MoeConfigFields {
388+ /** Common across Mixtral, Qwen2/3-MoE, Llama4, GPT-OSS, … */
389+ num_experts_per_tok ?: number ;
390+ /** Alternative spelling (some checkpoints) */
391+ num_experts_per_token ?: number ;
392+ num_local_experts ?: number ;
393+ num_experts ?: number ;
394+ /** DeepSeek family */
395+ n_routed_experts ?: number ;
396+ n_shared_experts ?: number ;
397+ /** Multi-modal Ernie 4.5 */
398+ moe_num_shared_experts ?: number ;
399+ }
400+
401+ export interface ModelConfig extends MoeConfigFields {
374402 quantization_config ?: QuantizationConfig ;
375- text_config ?: { quantization_config ?: QuantizationConfig } ;
403+ text_config ?: { quantization_config ?: QuantizationConfig } & MoeConfigFields ;
404+ }
405+
406+ /**
407+ * Active-parameter breakdown for Mixture-of-Experts models.
408+ *
409+ * For MoE models, only `topK` of `numExperts` routed experts run per token, so the
410+ * usable ("active") parameter count is much smaller than the total stored on disk.
411+ * `active = alwaysActive + topK * perExpert`. Returned by `parseSafetensorsMetadata`
412+ * when the model's `config.json` exposes MoE fields and tensor names indicate a
413+ * supported expert layout.
414+ */
415+ export interface MoeInfo {
416+ numExperts : number ;
417+ topK : number ;
418+ /** Average parameter count per routed expert (= sum-of-routed / numExperts). */
419+ perExpert : number ;
420+ /** Everything that runs on every token: embeddings, attention, norms, lm_head, router, shared experts, … */
421+ alwaysActive : number ;
422+ /** alwaysActive + topK * perExpert */
423+ active : number ;
424+ /** True when the model has a dense shared-expert MLP alongside routed experts (Deepseek, Qwen-MoE, Command-A, …). */
425+ hasSharedExpert : boolean ;
376426}
377427
378428/**
@@ -473,6 +523,76 @@ function getQuantizationMultiplier(tensorName: string, dtype: Dtype, quantConfig
473523 }
474524}
475525
526+ function getMoeConfig ( config : ModelConfig | null ) : { topK : number ; numExperts : number } | undefined {
527+ if ( ! config ) return undefined ;
528+ const sources : MoeConfigFields [ ] = [ config , config . text_config ?? { } ] ;
529+ let topK : number | undefined ;
530+ let numExperts : number | undefined ;
531+ for ( const src of sources ) {
532+ topK = topK ?? src . num_experts_per_tok ?? src . num_experts_per_token ;
533+ numExperts = numExperts ?? src . num_local_experts ?? src . num_experts ?? src . n_routed_experts ;
534+ }
535+ if ( ! topK || ! numExperts || topK <= 0 || numExperts <= 0 || topK > numExperts ) return undefined ;
536+ return { topK, numExperts } ;
537+ }
538+
539+ /**
540+ * Decide whether a tensor belongs to a *routed* expert (one that is gated per token).
541+ * Shared/dense experts never match.
542+ *
543+ * Recognized layouts:
544+ * - per-expert legacy: `…experts.{int}.…` (Mixtral, Phi-MoE, OlMoE, Qwen-MoE, …)
545+ * - per-expert with prefix: `…experts.expert_{int}.…` (Switch Transformers)
546+ * - stacked 3D: `…experts.<name>` where shape[0] === numExperts
547+ * (GPT-OSS, modern Mixtral/Qwen/Deepseek in-memory format, GraniteMoE, JetMoE)
548+ */
549+ function isRoutedExpertTensor ( name : string , info : TensorInfo , numExperts : number ) : boolean {
550+ if ( name . includes ( "shared_expert" ) ) return false ;
551+ if ( / \. e x p e r t s \. (?: e x p e r t _ ) ? \d + \. / . test ( name ) ) return true ;
552+ if ( / \. e x p e r t s \. [ A - Z a - z _ ] [ \w ] * (?: \. (?: w e i g h t | b i a s ) ) ? $ / . test ( name ) && info . shape [ 0 ] === numExperts ) return true ;
553+ return false ;
554+ }
555+
556+ function computeMoeInfoFromHeaders (
557+ headers : Iterable < SafetensorsFileHeader > ,
558+ config : ModelConfig | null ,
559+ ) : MoeInfo | undefined {
560+ const moeCfg = getMoeConfig ( config ) ;
561+ if ( ! moeCfg ) return undefined ;
562+
563+ let total = 0 ;
564+ let routedExpert = 0 ;
565+ let hasSharedExpert = false ;
566+
567+ for ( const header of headers ) {
568+ for ( const [ name , value ] of Object . entries ( header ) ) {
569+ if ( name === "__metadata__" ) continue ;
570+ const info = value as TensorInfo ;
571+ if ( info . shape . length === 0 ) continue ;
572+ const n = info . shape . reduce ( ( a , b ) => a * b , 1 ) ;
573+ if ( ! Number . isFinite ( n ) ) continue ;
574+ total += n ;
575+ if ( isRoutedExpertTensor ( name , info , moeCfg . numExperts ) ) routedExpert += n ;
576+ else if ( name . includes ( "shared_expert" ) ) hasSharedExpert = true ;
577+ }
578+ }
579+
580+ if ( routedExpert === 0 ) return undefined ; // config says MoE but tensors don't look like one — bail safely
581+
582+ const perExpert = routedExpert / moeCfg . numExperts ;
583+ const alwaysActive = total - routedExpert ;
584+ const active = alwaysActive + moeCfg . topK * perExpert ;
585+
586+ return {
587+ numExperts : moeCfg . numExperts ,
588+ topK : moeCfg . topK ,
589+ perExpert,
590+ alwaysActive,
591+ active,
592+ hasSharedExpert,
593+ } ;
594+ }
595+
476596function computeNumOfParamsByDtypeSingleFile (
477597 header : SafetensorsFileHeader ,
478598 quantConfig ?: QuantizationConfig ,
0 commit comments