move quant parsing to base model config

YouNeedCryDear · YouNeedCryDear · commit ac5b4c047d07 · 2026-04-01T12:11:44.000-07:00
diff --git a/pkg/hfutil/modelconfig/interface.go b/pkg/hfutil/modelconfig/interface.go
@@ -67,6 +67,9 @@ type BaseModelConfig struct {
 	TorchDtype         string   `json:"torch_dtype"`
 	TransformerVersion string   `json:"transformers_version"`
 
+	// Quantization config (optional, shared across all model types)
+	QuantizationConfig *QuantizationConfig `json:"quantization_config,omitempty"`
+
 	// Internal fields (not in JSON)
 	ConfigPath string `json:"-"`
 }
@@ -91,6 +94,14 @@ func (c *BaseModelConfig) GetTorchDtype() string {
 	return c.TorchDtype
 }
 
+// GetQuantizationType returns the quantization method used (if any)
+func (c *BaseModelConfig) GetQuantizationType() string {
+	if c.QuantizationConfig != nil && c.QuantizationConfig.QuantMethod != "" {
+		return c.QuantizationConfig.QuantMethod
+	}
+	return ""
+}
+
 // Default implementation for HasVision - most models don't have vision capabilities
 func (c *BaseModelConfig) HasVision() bool {
 	return false
@@ -238,9 +249,6 @@ type GenericModelConfig struct {
 	IntermediateSize      int `json:"intermediate_size"`
 	MaxPositionEmbeddings int `json:"max_position_embeddings"`
 	VocabSize             int `json:"vocab_size"`
-
-	// Quantization config (optional)
-	QuantizationConfig *QuantizationConfig `json:"quantization_config,omitempty"`
 }
 
 // GetParameterCount attempts to get parameter count from safetensors, falls back to estimation
@@ -278,13 +286,6 @@ func estimateGenericParams(hiddenSize, numLayers, intermediateSize, vocabSize in
 	return embeddingParams + totalLayerParams
 }
 
-func (c *GenericModelConfig) GetQuantizationType() string {
-	if c.QuantizationConfig != nil && c.QuantizationConfig.QuantMethod != "" {
-		return c.QuantizationConfig.QuantMethod
-	}
-	return ""
-}
-
 func (c *GenericModelConfig) GetContextLength() int {
 	return c.MaxPositionEmbeddings
 }
@@ -380,11 +381,6 @@ func (c *GenericDiffusionModelConfig) GetParameterCount() int64 {
 	return total
 }
 
-func (c *GenericDiffusionModelConfig) GetQuantizationType() string {
-	// Not supported. Doesn't seem to be standardized in HF.
-	return ""
-}
-
 func (c *GenericDiffusionModelConfig) GetContextLength() int {
 	if c.ConfigPath == "" {
 		return 0