ModelTC
diff --git a/‎assets/wan_t2v/calib/samples.json‎
Lines changed: 6 additions & 0 deletions b/‎assets/wan_t2v/calib/samples.json‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎assets/wan_t2v/eval/samples.json‎
Lines changed: 6 additions & 0 deletions b/‎assets/wan_t2v/eval/samples.json‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎configs/quantization/video_gen/wan_t2v/awq_w_a.yaml‎
Lines changed: 50 additions & 0 deletions b/‎configs/quantization/video_gen/wan_t2v/awq_w_a.yaml‎
Lines changed: 50 additions & 0 deletions
diff --git a/‎configs/quantization/video_gen/wan_t2v/rtn_w_a.yaml‎
Lines changed: 33 additions & 0 deletions b/‎configs/quantization/video_gen/wan_t2v/rtn_w_a.yaml‎
Lines changed: 33 additions & 0 deletions
diff --git a/‎configs/quantization/video_gen/wan_t2v/smoothquant_w_a.yaml‎
Lines changed: 46 additions & 0 deletions b/‎configs/quantization/video_gen/wan_t2v/smoothquant_w_a.yaml‎
Lines changed: 46 additions & 0 deletions
diff --git a/‎llmc/__main__.py‎
100644100755
Lines changed: 4 additions & 2 deletions b/‎llmc/__main__.py‎
100644100755
Lines changed: 4 additions & 2 deletions
diff --git a/‎llmc/compression/quantization/base_blockwise_quantization.py‎
Lines changed: 23 additions & 9 deletions b/‎llmc/compression/quantization/base_blockwise_quantization.py‎
Lines changed: 23 additions & 9 deletions
diff --git a/‎llmc/compression/quantization/dgq.py‎
100644100755
Lines changed: 2 additions & 2 deletions b/‎llmc/compression/quantization/dgq.py‎
100644100755
Lines changed: 2 additions & 2 deletions
diff --git a/‎llmc/compression/quantization/module_utils.py‎
Lines changed: 82 additions & 0 deletions b/‎llmc/compression/quantization/module_utils.py‎
Lines changed: 82 additions & 0 deletions
diff --git a/‎llmc/compression/quantization/spqr.py‎
100644100755
Lines changed: 2 additions & 2 deletions b/‎llmc/compression/quantization/spqr.py‎
100644100755
Lines changed: 2 additions & 2 deletions
@@ -0,0 +1,6 @@
+[
+    {
+        "prompt": "A cat walks on the grass, realistic",
+        "negative_prompt": "Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards"
+    }
+]
@@ -0,0 +1,6 @@
+[
+    {
+        "prompt": "A cat walks on the grass, realistic",
+        "negative_prompt": "Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards"
+    }
+]
@@ -0,0 +1,50 @@
+base:
+    seed: &seed 42
+model:
+    type: WanT2V
+    path: /path/to/wan_t2v
+    torch_dtype: auto
+calib:
+    name: custom_t2v
+    download: False
+    path: ../assets/wan_t2v/calib/
+    sample_steps: 20
+    bs: 1
+    target_height: 480
+    target_width: 832
+    num_frames: 81
+    guidance_scale: 5.0
+    seed: *seed
+eval:
+    eval_pos: [transformed, fake_quant]
+    type: video_gen
+    name: custom_t2v
+    download: False
+    path: ../assets/wan_t2v/calib/
+    bs: 1
+    target_height: 480
+    target_width: 832
+    num_frames: 81
+    guidance_scale: 5.0
+    output_video_path: ./output_videos_awq/
+quant:
+    video_gen:
+        method: Awq
+        weight:
+            bit: 6
+            symmetric: True
+            granularity: per_channel
+            group_size: -1
+        act:
+            bit: 6
+            symmetric: True
+            granularity: per_token
+        special:
+            trans: True
+            trans_version: v2
+            weight_clip: True
+            clip_sym: True
+save:
+    save_trans: False
+    save_fake: False
+    save_path: /path/to/save/
@@ -0,0 +1,33 @@
+base:
+    seed: &seed 42
+model:
+    type: WanT2V
+    path: /path/to/wan_t2v
+    torch_dtype: auto
+eval:
+    eval_pos: [transformed, fake_quant]
+    type: video_gen
+    name: custom_t2v
+    download: False
+    path: /mtc/gushiqiao/llmc_video_new/llmc/assets/wan_t2v/
+    bs: 1
+    target_height: 480
+    target_width: 832
+    num_frames: 81
+    guidance_scale: 5.0
+    output_video_path: ./output_videos_sq/
+quant:
+    video_gen:
+        method: RTN
+        weight:
+            bit: 6
+            symmetric: True
+            granularity: per_channel
+        act:
+            bit: 6
+            symmetric: True
+            granularity: per_token
+save:
+    save_trans: False
+    save_fake: False
+    save_path: /path/to/save/
@@ -0,0 +1,46 @@
+base:
+    seed: &seed 42
+model:
+    type: WanT2V
+    path: /path/to/wan_t2v
+    torch_dtype: auto
+calib:
+    name: custom_t2v
+    download: False
+    path: ../assets/wan_t2v/calib/
+    sample_steps: 20
+    bs: 1
+    target_height: 480
+    target_width: 832
+    num_frames: 81
+    guidance_scale: 5.0
+    seed: *seed
+eval:
+    eval_pos: [transformed, fake_quant]
+    type: video_gen
+    name: custom_t2v
+    download: False
+    path: ../assets/wan_t2v/calib/
+    bs: 1
+    target_height: 480
+    target_width: 832
+    num_frames: 81
+    guidance_scale: 5.0
+    output_video_path: ./output_videos_sq/
+quant:
+    video_gen:
+        method: SmoothQuant
+        weight:
+            bit: 6
+            symmetric: True
+            granularity: per_channel
+        act:
+            bit: 6
+            symmetric: True
+            granularity: per_token
+        special:
+            alpha: 0.7
+save:
+    save_trans: False
+    save_fake: False
+    save_path: /path/to/save/
@@ -30,12 +30,14 @@ def main(config):
     logger.info(f'model: {model}')
     logger.info(f'tokenizer: {model.get_tokenizer()}')
 
+    eval_list = get_eval_list(model, config)
+    eval_model(model, None, eval_list, eval_pos='pretrain')
+
     blockwise_opts = []
     modalities, modality_configs = get_modality(config)
+
     for modality, modality_config in zip(modalities, modality_configs):
         model.set_modality(modality)
-        eval_list = get_eval_list(model, config)
-        eval_model(model, None, eval_list, eval_pos='pretrain')
         if not config.get('calib', False):
             blockwise_opt = ALGO_REGISTRY[modality_config.method](
                 model,
 
@@ -249,8 +249,8 @@ def set_quant_config(self):
                 self.config['model']['type'] in ['Opt', 'Llama']
             ), 'Please set online_rotate=False'
             self.fp32_had = special_config.get('fp32_had', False)
-        self.hidden_size = self.model.model_config.hidden_size
-        self.set_model_config()
+        if self.quant_config.modality != 'video_gen':
+            self.set_model_config()
         self.modality = self.quant_config.modality
         logger.info(f'self.quant_objects : {self.quant_config.modality}')
 
@@ -373,12 +373,12 @@ def block_forward(self, block, input_data=None):
                 if torch.is_tensor(self.input['kwargs'][i][k]):
                     self.input['kwargs'][i][k] = self.input['kwargs'][i][k].to(
                         device=next(block.parameters()).device
-                    )  # noqa
+                    )
                 if isinstance(self.input['kwargs'][i][k], tuple):
                     self.input['kwargs'][i][k] = tuple(
                         tmp.to(device=next(block.parameters()).device)
                         for tmp in self.input['kwargs'][i][k]
-                    )  # noqa
+                    )
             with torch.no_grad():
                 out = block(input_data[i], **self.input['kwargs'][i])
                 if isinstance(out, tuple):
@@ -474,9 +474,10 @@ def block_transform(self, block, input_feat, block_kwargs):
             inspect_has_kwargs = subset['has_kwargs']
             if inspect_has_kwargs:
                 if 'sub_keys' in subset:
-                    subset_kwargs = [
-                        {k: block_kwargs[0][v] for k, v in subset['sub_keys'].items()}
-                    ]
+                    subset_kwargs = []
+                    for i in range(len(block_kwargs)):
+                        for k, v in subset['sub_keys'].items():
+                            subset_kwargs.append({k: block_kwargs[i][v]})
                 else:
                     subset_kwargs = block_kwargs
             else:
@@ -746,7 +747,10 @@ def shift_ln_fcs(self, ln, fcs, shifts):
     def scale_ln_fcs(self, ln, fcs, scales):
         if not isinstance(fcs, list):
             fcs = [fcs]
+
         scales = scales.to(ln.weight.device)
+        scales = scales.to(ln.weight.dtype)
+
         ln.weight.div_(scales)
 
         if hasattr(ln, 'bias') and ln.bias is not None:
@@ -954,6 +958,13 @@ def deploy(self, quant_format, keep_device=False):
                 self.get_replacement_params(mode=quant_format, w_only=self.w_only),
                 keep_device=keep_device,
             )
+        if self.modality == 'video_gen':
+            self.model.replace_video_gen_module_all(
+                module,
+                self.get_replacement_params(mode=quant_format, w_only=self.w_only),
+                keep_device=keep_device,
+            )
+
         self.set_non_linear_mode(quant_format, self.model.model, False)
 
         if self.quant_kvcache:
@@ -973,8 +984,11 @@ def deploy(self, quant_format, keep_device=False):
 
     @torch.no_grad()
     def copy_tokenizer(self, path):
-        self.model.tokenizer.save_pretrained(path)
-        logger.info('copy tokenizer done --')
+        if self.model.tokenizer is not None:
+            self.model.tokenizer.save_pretrained(path)
+            logger.info('copy tokenizer done --')
+        else:
+            logger.info('no tokenizer, skip --')
 
     @torch.no_grad()
     def contiguous_params(self):
 
@@ -43,8 +43,8 @@ def set_quant_config(self):
             self.quant_out = True
         else:
             self.quant_out = False
-        self.quant_type = self.quant_config.get('quant_type', 'int_quant')
-        assert self.quant_type != 'float_quant', 'DGQ do not support Float quant now.'
+        self.quant_type = self.quant_config.get('quant_type', 'int-quant')
+        assert self.quant_type != 'float-quant', 'DGQ do not support Float quant now.'
         # set weight quant config
         self.wquantizer_w4 = IntegerQuantizer(**self.quant_config['weight']['w_1'])
         perchannel_setting = {
 
@@ -40,6 +40,88 @@ def block_wise_fp8_forward_func(x, w, w_scale, block_size, bias):
     return y
 
 
+class FakeAffineLayerNorm(nn.Module):
+    def __init__(self, norm, shape):
+        super().__init__()
+        self.register_parameter('weight', nn.Parameter(torch.ones(shape, dtype=torch.float)))
+        self.register_parameter('bias', nn.Parameter(torch.ones(shape, dtype=torch.float)))
+        self.norm = norm
+
+    def forward(self, x):
+        return self.norm(x)
+
+    def extra_repr(self):
+        return f'affine=True (emulated), shape={self.weight.shape}'
+
+
+class LlmcWanTransformerBlock(nn.Module):
+    def __init__(self, module):
+        super().__init__()
+
+        self.norm1 = FakeAffineLayerNorm(module.norm1, module.scale_shift_table.shape[-1])
+        self.attn1 = module.attn1
+
+        self.attn2 = module.attn2
+        self.norm2 = module.norm2
+
+        self.norm3 = FakeAffineLayerNorm(module.norm1, module.scale_shift_table.shape[-1])
+        self.ffn = module.ffn
+        self.scale_shift_table = module.scale_shift_table
+
+    def forward(
+        self,
+        hidden_states,
+        encoder_hidden_states,
+        temb,
+        rotary_emb,
+    ):
+        shift_msa, scale_msa, gate_msa, c_shift_msa, c_scale_msa, c_gate_msa = (
+            self.scale_shift_table + temb
+        ).chunk(6, dim=1)
+
+        # 1. Self-attention
+        norm1_weight = (1 + scale_msa) * self.norm1.weight
+        norm1_bias = shift_msa * self.norm1.bias
+
+        norm_hidden_states = (
+            self.norm1(hidden_states.float()) * norm1_weight + norm1_bias
+        ).type_as(hidden_states)
+        attn_output = self.attn1(
+            hidden_states=norm_hidden_states, rotary_emb=rotary_emb
+        )
+        hidden_states = (hidden_states.float() + attn_output * gate_msa).type_as(
+            hidden_states
+        )
+
+        # 2. Cross-attention
+        norm_hidden_states = self.norm2(hidden_states.float()).type_as(hidden_states)
+        attn_output = self.attn2(
+            hidden_states=norm_hidden_states,
+            encoder_hidden_states=encoder_hidden_states,
+        )
+        hidden_states = hidden_states + attn_output
+
+        # 3. Feed-forward
+        norm3_weight = (1 + c_scale_msa) * self.norm3.weight
+        norm3_bias = c_shift_msa * self.norm3.bias
+
+        norm_hidden_states = (
+            self.norm3(hidden_states.float()) * norm3_weight + norm3_bias
+        ).type_as(hidden_states)
+        ff_output = self.ffn(norm_hidden_states)
+        hidden_states = (
+            hidden_states.float() + ff_output.float() * c_gate_msa
+        ).type_as(hidden_states)
+
+        return hidden_states
+
+    @classmethod
+    @torch.no_grad()
+    def new(cls, module):
+        new_module = cls(module)
+        return new_module
+
+
 class LlmcFp8Linear(nn.Module):
     def __init__(self, in_features, out_features, bias, block_size):
         super().__init__()
 
@@ -50,8 +50,8 @@ def add_quant_config(self):
         scale_config = special_config['scale']
         zero_config = special_config['zero']
 
-        self.quant_type = self.quant_config.get('quant_type', 'int_quant')
-        assert self.quant_type != 'float_quant', 'SPQR do not support Float quant now.'
+        self.quant_type = self.quant_config.get('quant_type', 'int-quant')
+        assert self.quant_type != 'float-quant', 'SPQR do not support Float quant now.'
         self.scale_quantizer = IntegerQuantizer(**scale_config)
         self.zero_quantizer = IntegerQuantizer(**zero_config)
         self.Q = IntegerQuantizer(
-Original file line number
+Diff line change
@@ @@ -0,0 +1,6 @@ @@
 +[
 +    {
 +        "prompt": "A cat walks on the grass, realistic",
 +        "negative_prompt": "Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards"
 +    }
 +]