support neo++ fp8 (#993)

helloyongyang · web-flow · commit 73147d4a5e2c · 2026-04-09T15:04:33.000+08:00
diff --git a/configs/neopp/neopp_dense_fp8.json b/configs/neopp/neopp_dense_fp8.json
@@ -0,0 +1,12 @@
+{
+    "version": "dense",
+    "load_kv_cache_in_pipeline_for_debug": true,
+    "infer_steps": 50,
+    "attn_type": "flash_attn3",
+    "cfg_scale": 4.0,
+    "timestep_shift": 3.0,
+    "cfg_interval": [-1, 2],
+    "enable_cfg": true,
+    "dit_quantized": true,
+    "dit_quant_scheme": "fp8-sgl"
+}
diff --git a/examples/neopp/neopp_dense_1k.py b/examples/neopp/neopp_dense_1k.py
@@ -5,7 +5,7 @@
 # -------------------------------------------------
 
 pipe = LightX2VPipeline(
-    model_path="/data/nvme1/yongyang/FL/neo_9b_new/hf_step4000_ema",
+    model_path="/data/nvme1/yongyang/FL/neo_9b_new/hf_step44000_ema_x2v_part",
     model_cls="neopp",
     support_tasks=["t2i", "i2i"],
 )
@@ -22,15 +22,15 @@
 # TURN 0
 # -------------------------------------------------
 pipe.runner.load_kvcache(
-    "/data/nvme1/yongyang/FL/neo_9b_new/vlm_tensor/to_x2v_cond_kv_0_289.pt",
-    "/data/nvme1/yongyang/FL/neo_9b_new/vlm_tensor/to_x2v_uncond_kv_0_9.pt",
+    "/data/nvme1/yongyang/FL/neo_9b_new/vlm_tensor_44000_ema_1k/to_x2v_cond_kv_0_298.pt",
+    "/data/nvme1/yongyang/FL/neo_9b_new/vlm_tensor_44000_ema_1k/to_x2v_uncond_kv_0_9.pt",
 )
 pipe.runner.set_inference_params(
-    index_offset_cond=289,
+    index_offset_cond=298,
     index_offset_uncond=9,
     cfg_interval=(-1, 2),
     cfg_scale=4.0,
-    cfg_norm="global",
+    cfg_norm="none",
     timestep_shift=3.0,
 )
 
@@ -45,20 +45,43 @@
 # TURN 1
 # -------------------------------------------------
 pipe.runner.load_kvcache(
-    "/data/nvme1/yongyang/FL/neo_9b_new/vlm_tensor/to_x2v_cond_kv_1_346.pt",
-    "/data/nvme1/yongyang/FL/neo_9b_new/vlm_tensor/to_x2v_uncond_kv_1_12.pt",
+    "/data/nvme1/yongyang/FL/neo_9b_new/vlm_tensor_44000_ema_1k/to_x2v_cond_kv_1_366.pt",
+    "/data/nvme1/yongyang/FL/neo_9b_new/vlm_tensor_44000_ema_1k/to_x2v_uncond_kv_1_12.pt",
 )
 pipe.runner.set_inference_params(
-    index_offset_cond=346,
+    index_offset_cond=366,
     index_offset_uncond=12,
     cfg_interval=(-1, 2),
     cfg_scale=4.0,
-    cfg_norm="global",
+    cfg_norm="none",
     timestep_shift=3.0,
 )
 
 pipe.generate(
-    seed=200,
+    seed=201,
     save_result_path="/data/nvme1/yongyang/FL/LightX2V/save_results/output_lightx2v_neopp_dense_1k_1.png",
     target_shape=[1024, 1024],  # Height, Width
 )
+
+
+# -------------------------------------------------
+# TURN 2
+# -------------------------------------------------
+pipe.runner.load_kvcache(
+    "/data/nvme1/yongyang/FL/neo_9b_new/vlm_tensor_44000_ema_1k/to_x2v_cond_kv_2_441.pt",
+    "/data/nvme1/yongyang/FL/neo_9b_new/vlm_tensor_44000_ema_1k/to_x2v_uncond_kv_2_15.pt",
+)
+pipe.runner.set_inference_params(
+    index_offset_cond=441,
+    index_offset_uncond=15,
+    cfg_interval=(-1, 2),
+    cfg_scale=4.0,
+    cfg_norm="none",
+    timestep_shift=3.0,
+)
+
+pipe.generate(
+    seed=202,
+    save_result_path="/data/nvme1/yongyang/FL/LightX2V/save_results/output_lightx2v_neopp_dense_1k_2.png",
+    target_shape=[1024, 1024],  # Height, Width
+)
diff --git a/examples/neopp/neopp_dense_1k_fp8.py b/examples/neopp/neopp_dense_1k_fp8.py
@@ -0,0 +1,87 @@
+from lightx2v import LightX2VPipeline
+
+# -------------------------------------------------
+# Initialize pipeline for NeoPP
+# -------------------------------------------------
+
+pipe = LightX2VPipeline(
+    model_path="/data/nvme1/yongyang/FL/neo_9b_new/hf_step44000_ema_x2v_part_fp8",
+    model_cls="neopp",
+    support_tasks=["t2i", "i2i"],
+)
+
+pipe.create_generator(config_json="../../configs/neopp/neopp_dense_fp8.json")
+pipe.modify_config({"load_kv_cache_in_pipeline_for_debug": False, "save_result_for_debug": True})
+
+
+# -------------------------------------------------
+# Load KV cache and generate
+# -------------------------------------------------
+
+# -------------------------------------------------
+# TURN 0
+# -------------------------------------------------
+pipe.runner.load_kvcache(
+    "/data/nvme1/yongyang/FL/neo_9b_new/vlm_tensor_44000_ema_1k/to_x2v_cond_kv_0_298.pt",
+    "/data/nvme1/yongyang/FL/neo_9b_new/vlm_tensor_44000_ema_1k/to_x2v_uncond_kv_0_9.pt",
+)
+pipe.runner.set_inference_params(
+    index_offset_cond=298,
+    index_offset_uncond=9,
+    cfg_interval=(-1, 2),
+    cfg_scale=4.0,
+    cfg_norm="none",
+    timestep_shift=3.0,
+)
+
+pipe.generate(
+    seed=200,
+    save_result_path="/data/nvme1/yongyang/FL/LightX2V/save_results/output_lightx2v_neopp_dense_1k_fp8_0.png",
+    target_shape=[1024, 1024],  # Height, Width
+)
+
+
+# -------------------------------------------------
+# TURN 1
+# -------------------------------------------------
+pipe.runner.load_kvcache(
+    "/data/nvme1/yongyang/FL/neo_9b_new/vlm_tensor_44000_ema_1k/to_x2v_cond_kv_1_366.pt",
+    "/data/nvme1/yongyang/FL/neo_9b_new/vlm_tensor_44000_ema_1k/to_x2v_uncond_kv_1_12.pt",
+)
+pipe.runner.set_inference_params(
+    index_offset_cond=366,
+    index_offset_uncond=12,
+    cfg_interval=(-1, 2),
+    cfg_scale=4.0,
+    cfg_norm="none",
+    timestep_shift=3.0,
+)
+
+pipe.generate(
+    seed=201,
+    save_result_path="/data/nvme1/yongyang/FL/LightX2V/save_results/output_lightx2v_neopp_dense_1k_fp8_1.png",
+    target_shape=[1024, 1024],  # Height, Width
+)
+
+
+# -------------------------------------------------
+# TURN 2
+# -------------------------------------------------
+pipe.runner.load_kvcache(
+    "/data/nvme1/yongyang/FL/neo_9b_new/vlm_tensor_44000_ema_1k/to_x2v_cond_kv_2_441.pt",
+    "/data/nvme1/yongyang/FL/neo_9b_new/vlm_tensor_44000_ema_1k/to_x2v_uncond_kv_2_15.pt",
+)
+pipe.runner.set_inference_params(
+    index_offset_cond=441,
+    index_offset_uncond=15,
+    cfg_interval=(-1, 2),
+    cfg_scale=4.0,
+    cfg_norm="none",
+    timestep_shift=3.0,
+)
+
+pipe.generate(
+    seed=202,
+    save_result_path="/data/nvme1/yongyang/FL/LightX2V/save_results/output_lightx2v_neopp_dense_1k_fp8_2.png",
+    target_shape=[1024, 1024],  # Height, Width
+)
diff --git a/examples/neopp/neopp_dense_2k.py b/examples/neopp/neopp_dense_2k.py
@@ -0,0 +1,87 @@
+from lightx2v import LightX2VPipeline
+
+# -------------------------------------------------
+# Initialize pipeline for NeoPP
+# -------------------------------------------------
+
+pipe = LightX2VPipeline(
+    model_path="/data/nvme1/yongyang/FL/neo_9b_new/hf_step44000_ema_x2v_part",
+    model_cls="neopp",
+    support_tasks=["t2i", "i2i"],
+)
+
+pipe.create_generator(config_json="../../configs/neopp/neopp_dense.json")
+pipe.modify_config({"load_kv_cache_in_pipeline_for_debug": False, "save_result_for_debug": True})
+
+
+# -------------------------------------------------
+# Load KV cache and generate
+# -------------------------------------------------
+
+# -------------------------------------------------
+# TURN 0
+# -------------------------------------------------
+pipe.runner.load_kvcache(
+    "/data/nvme1/yongyang/FL/neo_9b_new/vlm_tensor_44000_ema_2k/to_x2v_cond_kv_0_298.pt",
+    "/data/nvme1/yongyang/FL/neo_9b_new/vlm_tensor_44000_ema_2k/to_x2v_uncond_kv_0_9.pt",
+)
+pipe.runner.set_inference_params(
+    index_offset_cond=298,
+    index_offset_uncond=9,
+    cfg_interval=(-1, 2),
+    cfg_scale=4.0,
+    cfg_norm="none",
+    timestep_shift=3.0,
+)
+
+pipe.generate(
+    seed=200,
+    save_result_path="/data/nvme1/yongyang/FL/LightX2V/save_results/output_lightx2v_neopp_dense_2k_0.png",
+    target_shape=[2048, 2048],  # Height, Width
+)
+
+
+# -------------------------------------------------
+# TURN 1
+# -------------------------------------------------
+pipe.runner.load_kvcache(
+    "/data/nvme1/yongyang/FL/neo_9b_new/vlm_tensor_44000_ema_2k/to_x2v_cond_kv_1_360.pt",
+    "/data/nvme1/yongyang/FL/neo_9b_new/vlm_tensor_44000_ema_2k/to_x2v_uncond_kv_1_12.pt",
+)
+pipe.runner.set_inference_params(
+    index_offset_cond=366,
+    index_offset_uncond=12,
+    cfg_interval=(-1, 2),
+    cfg_scale=4.0,
+    cfg_norm="none",
+    timestep_shift=3.0,
+)
+
+pipe.generate(
+    seed=201,
+    save_result_path="/data/nvme1/yongyang/FL/LightX2V/save_results/output_lightx2v_neopp_dense_2k_1.png",
+    target_shape=[2048, 2048],  # Height, Width
+)
+
+
+# -------------------------------------------------
+# TURN 2
+# -------------------------------------------------
+pipe.runner.load_kvcache(
+    "/data/nvme1/yongyang/FL/neo_9b_new/vlm_tensor_44000_ema_2k/to_x2v_cond_kv_2_439.pt",
+    "/data/nvme1/yongyang/FL/neo_9b_new/vlm_tensor_44000_ema_2k/to_x2v_uncond_kv_2_15.pt",
+)
+pipe.runner.set_inference_params(
+    index_offset_cond=441,
+    index_offset_uncond=15,
+    cfg_interval=(-1, 2),
+    cfg_scale=4.0,
+    cfg_norm="none",
+    timestep_shift=3.0,
+)
+
+pipe.generate(
+    seed=202,
+    save_result_path="/data/nvme1/yongyang/FL/LightX2V/save_results/output_lightx2v_neopp_dense_2k_2.png",
+    target_shape=[2048, 2048],  # Height, Width
+)
diff --git a/examples/neopp/neopp_dense_2k_fp8.py b/examples/neopp/neopp_dense_2k_fp8.py
@@ -0,0 +1,87 @@
+from lightx2v import LightX2VPipeline
+
+# -------------------------------------------------
+# Initialize pipeline for NeoPP
+# -------------------------------------------------
+
+pipe = LightX2VPipeline(
+    model_path="/data/nvme1/yongyang/FL/neo_9b_new/hf_step44000_ema_x2v_part_fp8",
+    model_cls="neopp",
+    support_tasks=["t2i", "i2i"],
+)
+
+pipe.create_generator(config_json="../../configs/neopp/neopp_dense_fp8.json")
+pipe.modify_config({"load_kv_cache_in_pipeline_for_debug": False, "save_result_for_debug": True})
+
+
+# -------------------------------------------------
+# Load KV cache and generate
+# -------------------------------------------------
+
+# -------------------------------------------------
+# TURN 0
+# -------------------------------------------------
+pipe.runner.load_kvcache(
+    "/data/nvme1/yongyang/FL/neo_9b_new/vlm_tensor_44000_ema_2k/to_x2v_cond_kv_0_298.pt",
+    "/data/nvme1/yongyang/FL/neo_9b_new/vlm_tensor_44000_ema_2k/to_x2v_uncond_kv_0_9.pt",
+)
+pipe.runner.set_inference_params(
+    index_offset_cond=298,
+    index_offset_uncond=9,
+    cfg_interval=(-1, 2),
+    cfg_scale=4.0,
+    cfg_norm="none",
+    timestep_shift=3.0,
+)
+
+pipe.generate(
+    seed=200,
+    save_result_path="/data/nvme1/yongyang/FL/LightX2V/save_results/output_lightx2v_neopp_dense_2k_fp8_0.png",
+    target_shape=[2048, 2048],  # Height, Width
+)
+
+
+# -------------------------------------------------
+# TURN 1
+# -------------------------------------------------
+pipe.runner.load_kvcache(
+    "/data/nvme1/yongyang/FL/neo_9b_new/vlm_tensor_44000_ema_2k/to_x2v_cond_kv_1_360.pt",
+    "/data/nvme1/yongyang/FL/neo_9b_new/vlm_tensor_44000_ema_2k/to_x2v_uncond_kv_1_12.pt",
+)
+pipe.runner.set_inference_params(
+    index_offset_cond=366,
+    index_offset_uncond=12,
+    cfg_interval=(-1, 2),
+    cfg_scale=4.0,
+    cfg_norm="none",
+    timestep_shift=3.0,
+)
+
+pipe.generate(
+    seed=201,
+    save_result_path="/data/nvme1/yongyang/FL/LightX2V/save_results/output_lightx2v_neopp_dense_2k_fp8_1.png",
+    target_shape=[2048, 2048],  # Height, Width
+)
+
+
+# -------------------------------------------------
+# TURN 2
+# -------------------------------------------------
+pipe.runner.load_kvcache(
+    "/data/nvme1/yongyang/FL/neo_9b_new/vlm_tensor_44000_ema_2k/to_x2v_cond_kv_2_439.pt",
+    "/data/nvme1/yongyang/FL/neo_9b_new/vlm_tensor_44000_ema_2k/to_x2v_uncond_kv_2_15.pt",
+)
+pipe.runner.set_inference_params(
+    index_offset_cond=441,
+    index_offset_uncond=15,
+    cfg_interval=(-1, 2),
+    cfg_scale=4.0,
+    cfg_norm="none",
+    timestep_shift=3.0,
+)
+
+pipe.generate(
+    seed=202,
+    save_result_path="/data/nvme1/yongyang/FL/LightX2V/save_results/output_lightx2v_neopp_dense_2k_fp8_2.png",
+    target_shape=[2048, 2048],  # Height, Width
+)
diff --git a/lightx2v/models/networks/neopp/weights/transformer_weights.py b/lightx2v/models/networks/neopp/weights/transformer_weights.py