From 1613e45d4caba9e95f7a0bd63fba6d32b2b179e1 Mon Sep 17 00:00:00 2001 From: zowiezhang Date: Wed, 8 Apr 2026 23:27:20 +0800 Subject: [PATCH 1/4] Complete The LightX2V's Support To Motus with i2v task. --- configs/motus/motus_i2v.json | 30 + lightx2v/infer.py | 4 + lightx2v/models/networks/motus/__init__.py | 21 + .../models/networks/motus/action_expert.py | 144 +++ lightx2v/models/networks/motus/core.py | 343 ++++++ lightx2v/models/networks/motus/image_utils.py | 19 + .../models/networks/motus/infer/__init__.py | 5 + .../models/networks/motus/infer/module_io.py | 24 + .../models/networks/motus/infer/post_infer.py | 20 + .../models/networks/motus/infer/pre_infer.py | 49 + .../networks/motus/infer/transformer_infer.py | 125 ++ lightx2v/models/networks/motus/model.py | 311 +++++ lightx2v/models/networks/motus/ops.py | 160 +++ lightx2v/models/networks/motus/primitives.py | 68 ++ lightx2v/models/networks/motus/t5.py | 3 + lightx2v/models/networks/motus/und_expert.py | 56 + .../models/networks/motus/wan/__init__.py | 13 + .../models/networks/motus/wan/attention.py | 98 ++ lightx2v/models/networks/motus/wan/model.py | 680 +++++++++++ lightx2v/models/networks/motus/wan/t5.py | 513 ++++++++ .../models/networks/motus/wan/tokenizers.py | 62 + lightx2v/models/networks/motus/wan/vae2_2.py | 1041 +++++++++++++++++ lightx2v/models/networks/motus/wan_model.py | 79 ++ lightx2v/models/runners/motus/__init__.py | 1 + lightx2v/models/runners/motus/motus_runner.py | 95 ++ lightx2v/models/schedulers/motus/__init__.py | 3 + lightx2v/models/schedulers/motus/scheduler.py | 39 + lightx2v/utils/input_info.py | 3 + scripts/motus/example_inputs/first_frame.png | Bin 0 -> 84851 bytes scripts/motus/example_inputs/state.npy | Bin 0 -> 184 bytes scripts/motus/run_motus_i2v.sh | 22 + 31 files changed, 4031 insertions(+) create mode 100644 configs/motus/motus_i2v.json create mode 100644 lightx2v/models/networks/motus/__init__.py create mode 100644 lightx2v/models/networks/motus/action_expert.py create mode 100644 lightx2v/models/networks/motus/core.py create mode 100644 lightx2v/models/networks/motus/image_utils.py create mode 100644 lightx2v/models/networks/motus/infer/__init__.py create mode 100644 lightx2v/models/networks/motus/infer/module_io.py create mode 100644 lightx2v/models/networks/motus/infer/post_infer.py create mode 100644 lightx2v/models/networks/motus/infer/pre_infer.py create mode 100644 lightx2v/models/networks/motus/infer/transformer_infer.py create mode 100644 lightx2v/models/networks/motus/model.py create mode 100644 lightx2v/models/networks/motus/ops.py create mode 100644 lightx2v/models/networks/motus/primitives.py create mode 100644 lightx2v/models/networks/motus/t5.py create mode 100644 lightx2v/models/networks/motus/und_expert.py create mode 100644 lightx2v/models/networks/motus/wan/__init__.py create mode 100644 lightx2v/models/networks/motus/wan/attention.py create mode 100644 lightx2v/models/networks/motus/wan/model.py create mode 100644 lightx2v/models/networks/motus/wan/t5.py create mode 100644 lightx2v/models/networks/motus/wan/tokenizers.py create mode 100644 lightx2v/models/networks/motus/wan/vae2_2.py create mode 100644 lightx2v/models/networks/motus/wan_model.py create mode 100644 lightx2v/models/runners/motus/__init__.py create mode 100644 lightx2v/models/runners/motus/motus_runner.py create mode 100644 lightx2v/models/schedulers/motus/__init__.py create mode 100644 lightx2v/models/schedulers/motus/scheduler.py create mode 100644 scripts/motus/example_inputs/first_frame.png create mode 100644 scripts/motus/example_inputs/state.npy create mode 100644 scripts/motus/run_motus_i2v.sh diff --git a/configs/motus/motus_i2v.json b/configs/motus/motus_i2v.json new file mode 100644 index 000000000..68cbb5d0b --- /dev/null +++ b/configs/motus/motus_i2v.json @@ -0,0 +1,30 @@ +{ + "checkpoint_path": "/path/to/MotusModel", + "wan_path": "/path/to/Wan2.2-TI2V-5B", + "vlm_path": "/path/to/Qwen3-VL-2B-Instruct", + "infer_steps": 10, + "num_inference_steps": 10, + "target_video_length": 9, + "target_height": 384, + "target_width": 320, + "attention_type": "flash_attn2", + "self_attn_1_type": "flash_attn2", + "self_attn_2_type": "flash_attn2", + "cross_attn_1_type": "flash_attn2", + "global_downsample_rate": 3, + "video_action_freq_ratio": 2, + "num_video_frames": 8, + "video_height": 384, + "video_width": 320, + "fps": 4, + "motus_quantized": false, + "motus_quant_scheme": "Default", + "load_pretrained_backbones": false, + "training_mode": "finetune", + "action_state_dim": 14, + "action_dim": 14, + "action_expert_dim": 1024, + "action_expert_ffn_dim_multiplier": 4, + "und_expert_hidden_size": 512, + "und_expert_ffn_dim_multiplier": 4 +} diff --git a/lightx2v/infer.py b/lightx2v/infer.py index 2ebd770b4..03239e6bb 100755 --- a/lightx2v/infer.py +++ b/lightx2v/infer.py @@ -7,6 +7,7 @@ from lightx2v.common.ops import * from lightx2v.models.runners.bagel.bagel_runner import BagelRunner # noqa: F401 +from lightx2v.models.runners.motus.motus_runner import MotusRunner # noqa: F401 try: from lightx2v.models.runners.flux2_klein.flux2_klein_runner import Flux2KleinRunner # noqa: F401 @@ -82,6 +83,7 @@ def main(): "bagel", "seedvr2", "neopp", + "motus", ], default="wan2.1", ) @@ -102,6 +104,7 @@ def main(): default="", help="The path to input image file(s) for image-to-video (i2v) or image-to-audio-video (i2av) task. Multiple paths should be comma-separated. Example: 'path1.jpg,path2.jpg'", ) + parser.add_argument("--state_path", type=str, default="", help="The path to input robot state file for Motus i2v inference.") parser.add_argument("--last_frame_path", type=str, default="", help="The path to last frame file for first-last-frame-to-video (flf2v) task") parser.add_argument("--audio_path", type=str, default="", help="The path to input audio file or directory for audio-to-video (s2v) task") parser.add_argument("--image_strength", type=float, default=1.0, help="The strength of the image-to-audio-video (i2av) task") @@ -167,6 +170,7 @@ def main(): help="Path to action model checkpoint for WorldPlay models.", ) parser.add_argument("--save_result_path", type=str, default=None, help="The path to save video path/file") + parser.add_argument("--save_action_path", type=str, default=None, help="The path to save action predictions for Motus.") parser.add_argument("--return_result_tensor", action="store_true", help="Whether to return result tensor. (Useful for comfyui)") parser.add_argument("--target_shape", type=int, nargs="+", default=[], help="Set return video or image shape") parser.add_argument("--target_video_length", type=int, default=81, help="The target video length for each generated clip") diff --git a/lightx2v/models/networks/motus/__init__.py b/lightx2v/models/networks/motus/__init__.py new file mode 100644 index 000000000..15f71005a --- /dev/null +++ b/lightx2v/models/networks/motus/__init__.py @@ -0,0 +1,21 @@ +from .action_expert import ActionExpert, ActionExpertConfig +from .core import Motus, MotusConfig +from .primitives import WanLayerNorm, WanRMSNorm, rope_apply, sinusoidal_embedding_1d +from .t5 import T5EncoderModel +from .und_expert import UndExpert, UndExpertConfig +from .wan_model import WanVideoModel + +__all__ = [ + "Motus", + "MotusConfig", + "WanVideoModel", + "ActionExpert", + "ActionExpertConfig", + "UndExpert", + "UndExpertConfig", + "T5EncoderModel", + "WanLayerNorm", + "WanRMSNorm", + "sinusoidal_embedding_1d", + "rope_apply", +] diff --git a/lightx2v/models/networks/motus/action_expert.py b/lightx2v/models/networks/motus/action_expert.py new file mode 100644 index 000000000..004177abb --- /dev/null +++ b/lightx2v/models/networks/motus/action_expert.py @@ -0,0 +1,144 @@ +import logging +import re +from dataclasses import dataclass + +import numpy as np +import torch +import torch.nn as nn + +from .primitives import WanLayerNorm, WanRMSNorm + +logger = logging.getLogger(__name__) + + +def get_1d_sincos_pos_embed_from_grid(embed_dim: int, pos): + assert embed_dim % 2 == 0 + omega = np.arange(embed_dim // 2, dtype=np.float64) + omega /= embed_dim / 2.0 + omega = 1.0 / 10000**omega + if isinstance(pos, torch.Tensor): + pos = pos.cpu().numpy() + pos = pos.reshape(-1) + out = np.einsum("m,d->md", pos, omega) + emb = np.concatenate([np.sin(out), np.cos(out)], axis=1) + return torch.from_numpy(emb).float() + + +@dataclass +class ActionExpertConfig: + dim: int = 1024 + ffn_dim: int = 4096 + num_layers: int = 30 + state_dim: int = 14 + action_dim: int = 14 + chunk_size: int = 16 + video_feature_dim: int = 3072 + causal: bool = False + num_registers: int = 4 + eps: float = 1e-6 + training_mode: str = "finetune" + + def __post_init__(self): + assert self.chunk_size >= 2 + + +def build_mlp(projector_type, in_features, out_features): + if projector_type == "linear": + return nn.Linear(in_features, out_features) + mlp_silu_match = re.match(r"^mlp(\d+)x_silu$", projector_type) + if mlp_silu_match: + mlp_depth = int(mlp_silu_match.group(1)) + modules = [nn.Linear(in_features, out_features)] + for _ in range(1, mlp_depth): + modules.append(nn.SiLU()) + modules.append(nn.Linear(out_features, out_features)) + return nn.Sequential(*modules) + raise ValueError(f"Unknown projector type: {projector_type}") + + +class StateActionEncoder(nn.Module): + def __init__(self, config: ActionExpertConfig): + super().__init__() + self.state_encoder = build_mlp("mlp3x_silu", config.state_dim, config.dim) + self.action_encoder = build_mlp("mlp3x_silu", config.action_dim, config.dim) + max_seq_len = config.chunk_size + 1 + config.num_registers + pos_embed = get_1d_sincos_pos_embed_from_grid(config.dim, np.arange(max_seq_len)) + self.register_buffer("pos_embedding", pos_embed.unsqueeze(0)) + + def forward(self, state_tokens: torch.Tensor, action_tokens: torch.Tensor, registers: torch.Tensor = None) -> torch.Tensor: + encoded = torch.cat([self.state_encoder(state_tokens), self.action_encoder(action_tokens)], dim=1) + if registers is not None: + encoded = torch.cat([encoded, registers], dim=1) + return encoded + self.pos_embedding[:, : encoded.shape[1], :] + + +class ActionEncoder(nn.Module): + def __init__(self, config: ActionExpertConfig): + super().__init__() + self.action_encoder = build_mlp("mlp3x_silu", config.action_dim, config.dim) + max_seq_len = config.chunk_size + config.num_registers + pos_embed = get_1d_sincos_pos_embed_from_grid(config.dim, np.arange(max_seq_len)) + self.register_buffer("pos_embedding", pos_embed.unsqueeze(0)) + + def forward(self, state_tokens: torch.Tensor, action_tokens: torch.Tensor, registers: torch.Tensor = None) -> torch.Tensor: + encoded = self.action_encoder(action_tokens) + if registers is not None: + encoded = torch.cat([encoded, registers], dim=1) + return encoded + self.pos_embedding[:, : encoded.shape[1], :] + + +class ActionExpertBlock(nn.Module): + def __init__(self, config: ActionExpertConfig, wan_config: dict): + super().__init__() + self.norm1 = WanLayerNorm(config.dim, eps=config.eps) + self.norm2 = WanLayerNorm(config.dim, eps=config.eps) + self.wan_num_heads = wan_config["num_heads"] + self.wan_head_dim = wan_config["head_dim"] + self.wan_dim = wan_config["dim"] + self.wan_action_qkv = nn.Parameter(torch.randn(3, self.wan_num_heads, config.dim, self.wan_head_dim) / (config.dim * self.wan_head_dim) ** 0.5) + self.wan_action_o = nn.Linear(self.wan_dim, config.dim, bias=False) + self.wan_action_norm_q = WanRMSNorm(self.wan_dim, eps=config.eps) + self.wan_action_norm_k = WanRMSNorm(self.wan_dim, eps=config.eps) + self.ffn = nn.Sequential(nn.Linear(config.dim, config.ffn_dim), nn.GELU(approximate="tanh"), nn.Linear(config.ffn_dim, config.dim)) + self.modulation = nn.Parameter(torch.randn(1, 6, config.dim) / config.dim**0.5) + + +class ActionDecoder(nn.Module): + def __init__(self, config: ActionExpertConfig): + super().__init__() + self.norm = WanLayerNorm(config.dim, eps=config.eps) + self.action_head = build_mlp("mlp1x_silu", config.dim, config.action_dim) + self.modulation = nn.Parameter(torch.randn(1, 2, config.dim) / config.dim**0.5) + + def forward(self, x: torch.Tensor, time_emb: torch.Tensor) -> torch.Tensor: + with torch.amp.autocast("cuda", dtype=torch.float32): + e0, e1 = (self.modulation.unsqueeze(0) + time_emb.unsqueeze(2)).chunk(2, dim=2) + z = self.norm(x) * (1 + e1.squeeze(2)) + e0.squeeze(2) + return self.action_head(z) + + +class ActionExpert(nn.Module): + def __init__(self, config: ActionExpertConfig, wan_config: dict = None): + super().__init__() + self.config = config + self.freq_dim = 256 + self.input_encoder = ActionEncoder(config) if config.training_mode == "pretrain" else StateActionEncoder(config) + self.time_embedding = nn.Sequential(nn.Linear(self.freq_dim, config.dim), nn.SiLU(), nn.Linear(config.dim, config.dim)) + self.time_projection = nn.Sequential(nn.SiLU(), nn.Linear(config.dim, config.dim * 6)) + block_cfg = wan_config or {"dim": 3072, "num_heads": 24, "head_dim": 128} + self.blocks = nn.ModuleList([ActionExpertBlock(config, block_cfg) for _ in range(config.num_layers)]) + self.registers = nn.Parameter(torch.empty(1, config.num_registers, config.dim).normal_(std=0.02)) if config.num_registers > 0 else None + self.decoder = ActionDecoder(config) + self.initialize_weights() + + def initialize_weights(self): + for module in self.modules(): + if isinstance(module, nn.Linear): + nn.init.xavier_uniform_(module.weight) + if module.bias is not None: + nn.init.zeros_(module.bias) + nn.init.zeros_(self.decoder.action_head[-1].weight) + nn.init.zeros_(self.decoder.action_head[-1].bias) + for module in self.time_embedding.modules(): + if isinstance(module, nn.Linear): + nn.init.normal_(module.weight, std=0.02) diff --git a/lightx2v/models/networks/motus/core.py b/lightx2v/models/networks/motus/core.py new file mode 100644 index 000000000..cd93ee98d --- /dev/null +++ b/lightx2v/models/networks/motus/core.py @@ -0,0 +1,343 @@ +import logging +from dataclasses import dataclass +from pathlib import Path +from typing import Dict, List, Optional, Tuple + +import torch +import torch.nn as nn +from transformers import AutoConfig, Qwen3VLForConditionalGeneration + +from .action_expert import ActionExpert, ActionExpertConfig +from .primitives import sinusoidal_embedding_1d +from .und_expert import UndExpert, UndExpertConfig +from .wan_model import WanVideoModel + +logger = logging.getLogger(__name__) + + +@dataclass +class MotusConfig: + wan_checkpoint_path: str + vae_path: str + wan_config_path: str + video_precision: str = "bfloat16" + vlm_checkpoint_path: str = "" + und_expert_hidden_size: int = 512 + und_expert_ffn_dim_multiplier: int = 4 + und_expert_norm_eps: float = 1e-5 + und_layers_to_extract: List[int] = None + vlm_adapter_input_dim: int = 2048 + vlm_adapter_projector_type: str = "mlp3x_silu" + num_layers: int = 30 + action_state_dim: int = 14 + action_dim: int = 14 + action_expert_dim: int = 1024 + action_expert_ffn_dim_multiplier: int = 4 + action_expert_norm_eps: float = 1e-6 + global_downsample_rate: int = 3 + video_action_freq_ratio: int = 2 + num_video_frames: int = 8 + video_height: int = 384 + video_width: int = 320 + batch_size: int = 1 + training_mode: str = "finetune" + load_pretrained_backbones: Optional[bool] = None + + def __post_init__(self): + self.action_chunk_size = self.num_video_frames * self.video_action_freq_ratio + if self.und_layers_to_extract is None: + self.und_layers_to_extract = list(range(self.num_layers)) + + +class VideoModule(nn.Module): + def __init__(self, video_model, dtype, device, grid_sizes): + super().__init__() + self.video_model = video_model + self.dtype = dtype + self.device = device + self.grid_sizes = grid_sizes + + def prepare_input(self, noisy_video_latent: torch.Tensor) -> torch.Tensor: + return self.video_model.wan_model.patch_embedding(noisy_video_latent).flatten(2).transpose(1, 2) + + def preprocess_t5_embeddings(self, language_embeddings) -> torch.Tensor: + if isinstance(language_embeddings, list): + text_len = self.video_model.wan_model.text_len + padded = [] + for emb in language_embeddings: + padded.append(torch.cat([emb, emb.new_zeros(text_len - emb.shape[0], emb.shape[1])]) if emb.shape[0] <= text_len else emb[:text_len]) + t5_context_raw = torch.stack(padded, dim=0) + else: + t5_context_raw = language_embeddings + return self.video_model.wan_model.text_embedding(t5_context_raw) + + def get_time_embedding(self, t_video: torch.Tensor, seq_len: int) -> tuple[torch.Tensor, torch.Tensor]: + if t_video.dim() == 1: + t_video = t_video.unsqueeze(1).expand(t_video.size(0), seq_len) + with torch.amp.autocast("cuda", dtype=torch.float32): + bt = t_video.size(0) + t_flat = t_video.flatten() + t_emb = self.video_model.wan_model.time_embedding(sinusoidal_embedding_1d(self.video_model.wan_model.freq_dim, t_flat).unflatten(0, (bt, seq_len)).float()) + t_emb_proj = self.video_model.wan_model.time_projection(t_emb).unflatten(2, (6, self.video_model.wan_model.dim)) + return t_emb, t_emb_proj + + def compute_adaln_modulation(self, video_adaln_params: torch.Tensor, layer_idx: int) -> tuple: + wan_layer = self.video_model.wan_model.blocks[layer_idx] + with torch.amp.autocast("cuda", dtype=torch.float32): + return (wan_layer.modulation.unsqueeze(0) + video_adaln_params).chunk(6, dim=2) + + def process_ffn(self, video_tokens: torch.Tensor, video_adaln_modulation: tuple, layer_idx: int) -> torch.Tensor: + wan_layer = self.video_model.wan_model.blocks[layer_idx] + v_mod = video_adaln_modulation + ffn_input = wan_layer.norm2(video_tokens).float() * (1 + v_mod[4].squeeze(2)) + v_mod[3].squeeze(2) + ffn_out = wan_layer.ffn(ffn_input) + with torch.amp.autocast("cuda", dtype=torch.float32): + return video_tokens + ffn_out * v_mod[5].squeeze(2) + + def apply_output_head(self, video_tokens: torch.Tensor, video_time_emb: torch.Tensor) -> torch.Tensor: + x = self.video_model.wan_model.head(video_tokens, video_time_emb) + x = self.video_model.wan_model.unpatchify(x, self.grid_sizes) + return torch.stack([u.float() for u in x], dim=0) + + +class UndModule(nn.Module): + def __init__(self, vlm_model, und_expert, config, dtype, device, image_context_adapter=None): + super().__init__() + self.vlm_model = vlm_model + self.und_expert = und_expert + self.config = config + self.dtype = dtype + self.device = device + self.image_context_adapter = image_context_adapter + + def _parse_vision_outputs(self, vision_outputs): + if hasattr(vision_outputs, "pooler_output"): + image_embeds = vision_outputs.pooler_output + deepstack_image_embeds = vision_outputs.get("hidden_states", None) if hasattr(vision_outputs, "get") else getattr(vision_outputs, "hidden_states", None) + elif isinstance(vision_outputs, tuple): + image_embeds = vision_outputs[0] + deepstack_image_embeds = vision_outputs[1] if len(vision_outputs) > 1 else None + else: + image_embeds = vision_outputs + deepstack_image_embeds = None + + if torch.is_tensor(image_embeds): + return image_embeds.to(self.device, self.dtype), deepstack_image_embeds + if isinstance(image_embeds, (list, tuple)): + return torch.cat(list(image_embeds), dim=0).to(self.device, self.dtype), deepstack_image_embeds + raise TypeError(f"Unsupported image feature output type: {type(image_embeds)}") + + def _process_vlm_inputs_to_tokens(self, vlm_inputs, batch: int): + if isinstance(vlm_inputs, list): + input_ids_batch = torch.cat([item["input_ids"] for item in vlm_inputs], dim=0).to(self.device) + attention_mask_batch = torch.cat([item["attention_mask"] for item in vlm_inputs], dim=0).to(self.device) + pixel_values_batch = torch.cat([item["pixel_values"] for item in vlm_inputs], dim=0).to(self.device) + image_grid_thw_batch = torch.cat([item["image_grid_thw"] for item in vlm_inputs], dim=0).to(self.device) + else: + input_ids_batch = vlm_inputs["input_ids"].to(self.device) + attention_mask_batch = vlm_inputs["attention_mask"].to(self.device) + pixel_values_batch = vlm_inputs["pixel_values"].to(self.device) + image_grid_thw_batch = vlm_inputs["image_grid_thw"].to(self.device) + + inputs_embeds = self.vlm_model.get_input_embeddings()(input_ids_batch) + vision_outputs = self.vlm_model.get_image_features(pixel_values_batch, image_grid_thw_batch) + image_embeds, deepstack_image_embeds = self._parse_vision_outputs(vision_outputs) + image_mask, _ = self.vlm_model.model.get_placeholder_mask(input_ids_batch, inputs_embeds=inputs_embeds, image_features=image_embeds) + inputs_embeds = inputs_embeds.masked_scatter(image_mask, image_embeds) + visual_pos_masks = image_mask[..., 0] + position_ids, _ = self.vlm_model.model.get_rope_index( + input_ids=input_ids_batch, + image_grid_thw=image_grid_thw_batch, + video_grid_thw=None, + attention_mask=attention_mask_batch, + ) + return inputs_embeds, attention_mask_batch, visual_pos_masks, deepstack_image_embeds, position_ids + + def extract_und_features(self, vlm_inputs) -> torch.Tensor: + batch = len(vlm_inputs) if isinstance(vlm_inputs, list) else vlm_inputs["input_ids"].shape[0] + inputs_embeds, attention_mask, visual_pos_masks, deepstack_image_embeds, position_ids = self._process_vlm_inputs_to_tokens(vlm_inputs, batch) + kwargs = { + "inputs_embeds": inputs_embeds, + "attention_mask": attention_mask, + "position_ids": position_ids, + "past_key_values": None, + "use_cache": False, + "output_attentions": False, + "output_hidden_states": True, + "return_dict": True, + } + if visual_pos_masks is not None: + kwargs["visual_pos_masks"] = visual_pos_masks + if deepstack_image_embeds is not None: + kwargs["deepstack_visual_embeds"] = deepstack_image_embeds + with torch.no_grad(): + vlm_output = self.vlm_model.model.language_model(**kwargs) + return self.und_expert.vlm_adapter(vlm_output.hidden_states[-1]) + + def extract_image_context(self, vlm_inputs) -> torch.Tensor | None: + if self.image_context_adapter is None: + return None + + if isinstance(vlm_inputs, list): + pixel_values = torch.cat([item["pixel_values"] for item in vlm_inputs], dim=0).to(self.device) + image_grid_thw = torch.cat([item["image_grid_thw"] for item in vlm_inputs], dim=0).to(self.device) + else: + pixel_values = vlm_inputs["pixel_values"].to(self.device) + image_grid_thw = vlm_inputs["image_grid_thw"].to(self.device) + + with torch.no_grad(): + vision_outputs = self.vlm_model.get_image_features(pixel_values, image_grid_thw) + image_embeds, _ = self._parse_vision_outputs(vision_outputs) + return self.image_context_adapter(image_embeds) + + def process_ffn(self, und_tokens: torch.Tensor, layer_idx: int) -> torch.Tensor: + block = self.und_expert.blocks[layer_idx] + return und_tokens + block.ffn(block.norm2(und_tokens)) + + +class ActionModule(nn.Module): + def __init__(self, action_expert: ActionExpert, config, video_model, vlm_model, dtype, device): + super().__init__() + self.action_expert = action_expert + self.config = config + self.video_model = video_model + self.vlm_model = vlm_model + self.dtype = dtype + self.device = device + + def get_time_embedding(self, t: torch.Tensor, seq_len: int) -> tuple[torch.Tensor, torch.Tensor]: + if t.dim() == 1: + t = t.unsqueeze(1).expand(t.size(0), seq_len) + with torch.amp.autocast("cuda", dtype=torch.float32): + bt = t.size(0) + t_flat = t.flatten() + a_e = self.action_expert.time_embedding(sinusoidal_embedding_1d(self.action_expert.freq_dim, t_flat).unflatten(0, (bt, seq_len)).float()) + a_e0 = self.action_expert.time_projection(a_e).unflatten(2, (6, self.config.action_expert_dim)) + return a_e, a_e0 + + def compute_adaln_modulation(self, action_adaln_params: torch.Tensor, layer_idx: int) -> tuple: + action_layer = self.action_expert.blocks[layer_idx] + with torch.amp.autocast("cuda", dtype=torch.float32): + return (action_layer.modulation.unsqueeze(0) + action_adaln_params).chunk(6, dim=2) + + def process_ffn(self, action_tokens: torch.Tensor, action_adaln_modulation: tuple, layer_idx: int) -> torch.Tensor: + action_block = self.action_expert.blocks[layer_idx] + a_mod = action_adaln_modulation + ffn_input = action_block.norm2(action_tokens).float() * (1 + a_mod[4].squeeze(2)) + a_mod[3].squeeze(2) + ffn_out = action_block.ffn(ffn_input) + with torch.amp.autocast("cuda", dtype=torch.float32): + return action_tokens + ffn_out * a_mod[5].squeeze(2) + + +class Motus(nn.Module): + def __init__(self, config: MotusConfig): + super().__init__() + self.config = config + self.dtype = torch.bfloat16 + load_backbones = True if config.load_pretrained_backbones is None else bool(config.load_pretrained_backbones) + + if load_backbones: + self.video_model = WanVideoModel.from_pretrained( + checkpoint_path=config.wan_checkpoint_path, + vae_path=config.vae_path, + config_path=config.wan_config_path, + precision=config.video_precision, + ) + else: + self.video_model = WanVideoModel.from_config( + config_path=config.wan_config_path, + vae_path=config.vae_path, + device="cuda", + precision=config.video_precision, + ) + + if load_backbones: + self.vlm_model = Qwen3VLForConditionalGeneration.from_pretrained( + config.vlm_checkpoint_path, + dtype=self.dtype, + device_map="cuda", + trust_remote_code=True, + ) + else: + vlm_cfg = AutoConfig.from_pretrained(config.vlm_checkpoint_path, trust_remote_code=True) + self.vlm_model = Qwen3VLForConditionalGeneration._from_config(vlm_cfg, torch_dtype=self.dtype) + self.vlm_model.to(device="cuda", dtype=self.dtype) + + for param in self.vlm_model.parameters(): + param.requires_grad = False + + wan_dim = getattr(self.video_model.wan_model.config, "dim", 3072) + wan_num_heads = getattr(self.video_model.wan_model.config, "num_heads", 24) + wan_head_dim = wan_dim // wan_num_heads + vlm_dim = self.vlm_model.config.text_config.hidden_size + vlm_num_heads = self.vlm_model.config.text_config.num_attention_heads + vlm_num_kv_heads = getattr(self.vlm_model.config.text_config, "num_key_value_heads", vlm_num_heads) + vlm_num_hidden_layers = self.vlm_model.config.text_config.num_hidden_layers + + wan_config = {"dim": wan_dim, "num_heads": wan_num_heads, "head_dim": wan_head_dim} + vlm_config = { + "hidden_size": vlm_dim, + "num_attention_heads": vlm_num_heads, + "num_key_value_heads": vlm_num_kv_heads, + "head_dim": vlm_dim // vlm_num_heads, + "num_hidden_layers": vlm_num_hidden_layers, + } + + action_chunk_size_for_expert = config.action_chunk_size if config.training_mode == "pretrain" else config.action_chunk_size + 1 + num_registers = 0 if config.training_mode == "pretrain" else 4 + action_config = ActionExpertConfig( + dim=config.action_expert_dim, + ffn_dim=config.action_expert_dim * config.action_expert_ffn_dim_multiplier, + num_layers=config.num_layers, + state_dim=config.action_state_dim, + action_dim=config.action_dim, + chunk_size=action_chunk_size_for_expert, + num_registers=num_registers, + video_feature_dim=wan_dim, + causal=False, + eps=config.action_expert_norm_eps, + training_mode=config.training_mode, + ) + self.action_expert = ActionExpert(action_config, wan_config) + + und_config = UndExpertConfig( + dim=config.und_expert_hidden_size, + ffn_dim=config.und_expert_hidden_size * config.und_expert_ffn_dim_multiplier, + num_layers=config.num_layers, + vlm_input_dim=config.vlm_adapter_input_dim, + vlm_projector_type=config.vlm_adapter_projector_type, + eps=config.und_expert_norm_eps, + ) + self.und_expert = UndExpert(und_config, wan_config, vlm_config) + self.image_context_adapter = nn.Sequential( + nn.Linear(vlm_dim, wan_dim), + nn.GELU(approximate="tanh"), + nn.Linear(wan_dim, wan_dim), + ) + + self.device = next(self.video_model.parameters()).device + self.action_expert.to(device=self.device, dtype=self.dtype) + self.und_expert.to(device=self.device, dtype=self.dtype) + self.image_context_adapter.to(device=self.device, dtype=self.dtype) + self.action_expert.time_embedding.to(dtype=torch.float32) + self.action_expert.time_projection.to(dtype=torch.float32) + + lat_t = 1 + config.num_video_frames // 4 + lat_h = config.video_height // 32 + lat_w = config.video_width // 32 + self.grid_sizes = torch.tensor([lat_t, lat_h, lat_w], dtype=torch.long, device=self.device).unsqueeze(0).expand(config.batch_size, -1) + self.video_module = VideoModule(self.video_model, self.dtype, self.device, self.grid_sizes) + self.und_module = UndModule(self.vlm_model, self.und_expert, self.config, self.dtype, self.device, image_context_adapter=self.image_context_adapter) + self.action_module = ActionModule(self.action_expert, self.config, self.video_model, self.vlm_model, self.dtype, self.device) + + def load_checkpoint(self, path: str, strict: bool = True) -> Dict: + checkpoint_path = Path(path) + if checkpoint_path.is_dir(): + checkpoint_file = checkpoint_path / "mp_rank_00_model_states.pt" + if not checkpoint_file.exists(): + raise FileNotFoundError(f"Checkpoint file not found: {checkpoint_file}") + path = str(checkpoint_file) + checkpoint = torch.load(path, map_location="cpu") + state_dict = checkpoint["module"] + self.load_state_dict(state_dict, strict=strict) + return {key: value for key, value in checkpoint.items() if key not in ["module", "config"]} diff --git a/lightx2v/models/networks/motus/image_utils.py b/lightx2v/models/networks/motus/image_utils.py new file mode 100644 index 000000000..f4f0ab1d1 --- /dev/null +++ b/lightx2v/models/networks/motus/image_utils.py @@ -0,0 +1,19 @@ +import cv2 +import numpy as np + + +def resize_with_padding(frame: np.ndarray, target_size: tuple[int, int]) -> np.ndarray: + target_height, target_width = target_size + original_height, original_width = frame.shape[:2] + + scale = min(target_height / original_height, target_width / original_width) + new_height = int(original_height * scale) + new_width = int(original_width * scale) + + resized_frame = cv2.resize(frame, (new_width, new_height)) + padded_frame = np.zeros((target_height, target_width, frame.shape[2]), dtype=frame.dtype) + + y_offset = (target_height - new_height) // 2 + x_offset = (target_width - new_width) // 2 + padded_frame[y_offset : y_offset + new_height, x_offset : x_offset + new_width] = resized_frame + return padded_frame diff --git a/lightx2v/models/networks/motus/infer/__init__.py b/lightx2v/models/networks/motus/infer/__init__.py new file mode 100644 index 000000000..9279bcbda --- /dev/null +++ b/lightx2v/models/networks/motus/infer/__init__.py @@ -0,0 +1,5 @@ +from .post_infer import MotusPostInfer +from .pre_infer import MotusPreInfer +from .transformer_infer import MotusTransformerInfer + +__all__ = ["MotusPreInfer", "MotusTransformerInfer", "MotusPostInfer"] diff --git a/lightx2v/models/networks/motus/infer/module_io.py b/lightx2v/models/networks/motus/infer/module_io.py new file mode 100644 index 000000000..6f2df77ac --- /dev/null +++ b/lightx2v/models/networks/motus/infer/module_io.py @@ -0,0 +1,24 @@ +from dataclasses import dataclass +from typing import Any + +import torch + + +@dataclass +class MotusPreInferModuleOutput: + first_frame: torch.Tensor + state: torch.Tensor + instruction: str + t5_embeddings: list[torch.Tensor] + vlm_inputs: list[dict[str, Any]] + processed_t5_context: torch.Tensor + image_context: torch.Tensor | None + und_tokens: torch.Tensor + condition_frame_latent: torch.Tensor + grid_sizes: torch.Tensor + + +@dataclass +class MotusPostInferModuleOutput: + pred_frames: torch.Tensor + pred_actions: torch.Tensor diff --git a/lightx2v/models/networks/motus/infer/post_infer.py b/lightx2v/models/networks/motus/infer/post_infer.py new file mode 100644 index 000000000..01efc9d94 --- /dev/null +++ b/lightx2v/models/networks/motus/infer/post_infer.py @@ -0,0 +1,20 @@ +import torch + +from .module_io import MotusPostInferModuleOutput + + +class MotusPostInfer: + def __init__(self, adapter, config): + self.adapter = adapter + self.config = config + self.scheduler = None + + def set_scheduler(self, scheduler): + self.scheduler = scheduler + + @torch.no_grad() + def infer(self, video_latents: torch.Tensor, action_latents: torch.Tensor): + decoded_frames = self.adapter.model.video_model.decode_video(video_latents) + pred_frames = ((decoded_frames[:, :, 1:] + 1.0) / 2.0).clamp(0, 1).float() + pred_actions = self.adapter.denormalize_actions(action_latents.float()) + return MotusPostInferModuleOutput(pred_frames=pred_frames, pred_actions=pred_actions) diff --git a/lightx2v/models/networks/motus/infer/pre_infer.py b/lightx2v/models/networks/motus/infer/pre_infer.py new file mode 100644 index 000000000..ab5cfd1c0 --- /dev/null +++ b/lightx2v/models/networks/motus/infer/pre_infer.py @@ -0,0 +1,49 @@ +import torch + +from .module_io import MotusPreInferModuleOutput + + +class MotusPreInfer: + def __init__(self, adapter, config): + self.adapter = adapter + self.config = config + self.scheduler = None + + def set_scheduler(self, scheduler): + self.scheduler = scheduler + + @torch.no_grad() + def infer(self, image_path: str, prompt: str, state_value, seed: int | None = None): + if self.scheduler is None: + raise RuntimeError("MotusPreInfer requires a scheduler before infer().") + + first_frame = self.adapter.prepare_frame(image_path) + state = self.adapter.prepare_state(state_value) + instruction = self.adapter.build_instruction(prompt) + t5_embeddings = self.adapter.build_t5_embeddings(instruction) + vlm_inputs = [self.adapter.build_vlm_inputs(instruction, first_frame)] + condition_frame_latent = self.adapter.encode_condition_frame(first_frame) + processed_t5_context = self.adapter.model.video_module.preprocess_t5_embeddings(t5_embeddings) + und_tokens = self.adapter.model.und_module.extract_und_features(vlm_inputs) + image_context = self.adapter.model.und_module.extract_image_context(vlm_inputs) + + self.scheduler.prepare( + seed=seed, + condition_frame_latent=condition_frame_latent, + action_shape=(state.shape[0], self.adapter.model.config.action_chunk_size, self.adapter.model.config.action_dim), + dtype=self.adapter.model.dtype, + device=self.adapter.device, + ) + + return MotusPreInferModuleOutput( + first_frame=first_frame, + state=state, + instruction=instruction, + t5_embeddings=t5_embeddings, + vlm_inputs=vlm_inputs, + processed_t5_context=processed_t5_context, + image_context=image_context, + und_tokens=und_tokens, + condition_frame_latent=condition_frame_latent, + grid_sizes=self.adapter.model.grid_sizes[: state.shape[0]], + ) diff --git a/lightx2v/models/networks/motus/infer/transformer_infer.py b/lightx2v/models/networks/motus/infer/transformer_infer.py new file mode 100644 index 000000000..2213d4fa7 --- /dev/null +++ b/lightx2v/models/networks/motus/infer/transformer_infer.py @@ -0,0 +1,125 @@ +import torch + +from lightx2v.common.transformer_infer.transformer_infer import BaseTransformerInfer + +from ..ops import RegistryAttention + + +class MotusTransformerInfer(BaseTransformerInfer): + def __init__(self, adapter, config): + self.adapter = adapter + self.config = config + self.self_attn_1_type = config.get("self_attn_1_type", config.get("attention_type", "flash_attn2")) + self.self_attn_2_type = config.get("self_attn_2_type", config.get("attention_type", "flash_attn2")) + self.cross_attn_1_type = config.get("cross_attn_1_type", config.get("attention_type", "flash_attn2")) + self.self_attn = RegistryAttention(self.self_attn_1_type) + self.joint_self_attn = RegistryAttention(self.self_attn_2_type) + self.cross_attn = RegistryAttention(self.cross_attn_1_type) + + def _joint_attention(self, pre_infer_out, video_tokens, action_tokens, und_tokens, video_adaln_modulation, action_adaln_modulation, layer_idx): + model = self.adapter.model + wan_layer = model.video_module.video_model.wan_model.blocks[layer_idx] + action_block = model.action_expert.blocks[layer_idx] + und_block = model.und_expert.blocks[layer_idx] + + v_mod = video_adaln_modulation + a_mod = action_adaln_modulation + norm_video = wan_layer.norm1(video_tokens).float() * (1 + v_mod[1].squeeze(2)) + v_mod[0].squeeze(2) + norm_action = action_block.norm1(action_tokens).float() * (1 + a_mod[1].squeeze(2)) + a_mod[0].squeeze(2) + norm_und = und_block.norm1(und_tokens) + + batch, video_len, video_dim = norm_video.shape + action_len = norm_action.shape[1] + und_len = norm_und.shape[1] + num_heads = model.video_model.wan_model.num_heads + head_dim = video_dim // num_heads + + video_q = wan_layer.self_attn.norm_q(wan_layer.self_attn.q(norm_video)).view(batch, video_len, num_heads, head_dim) + video_k = wan_layer.self_attn.norm_k(wan_layer.self_attn.k(norm_video)).view(batch, video_len, num_heads, head_dim) + video_v = wan_layer.self_attn.v(norm_video).view(batch, video_len, num_heads, head_dim) + freqs = self.adapter.get_wan_freqs() + video_q = self.adapter.rope_apply(video_q, pre_infer_out.grid_sizes, freqs) + video_k = self.adapter.rope_apply(video_k, pre_infer_out.grid_sizes, freqs) + + action_q, action_k, action_v = action_block.wan_action_qkv_mm(norm_action) + action_q = action_block.wan_action_norm_q(action_q.flatten(-2)).view(batch, action_len, num_heads, head_dim) + action_k = action_block.wan_action_norm_k(action_k.flatten(-2)).view(batch, action_len, num_heads, head_dim) + + und_q, und_k, und_v = und_block.wan_und_qkv_mm(norm_und) + und_q = und_block.wan_und_norm_q(und_q.flatten(-2)).view(batch, und_len, num_heads, head_dim) + und_k = und_block.wan_und_norm_k(und_k.flatten(-2)).view(batch, und_len, num_heads, head_dim) + + q_all = torch.cat([video_q, action_q, und_q], dim=1) + k_all = torch.cat([video_k, action_k, und_k], dim=1) + v_all = torch.cat([video_v, action_v, und_v], dim=1) + attn_out = self.joint_self_attn(q_all, k_all, v_all) + + video_out = wan_layer.self_attn.o(attn_out[:, :video_len, :]) + action_out = action_block.wan_action_o(attn_out[:, video_len : video_len + action_len, :]) + und_out = und_block.wan_und_o(attn_out[:, video_len + action_len :, :]) + + video_tokens = video_tokens + video_out * v_mod[2].squeeze(2) + action_tokens = action_tokens + action_out * a_mod[2].squeeze(2) + und_tokens = und_tokens + und_out + return video_tokens, action_tokens, und_tokens + + def _cross_attention(self, video_tokens, processed_t5_context, layer_idx): + wan_layer = self.adapter.model.video_module.video_model.wan_model.blocks[layer_idx] + batch, q_len, dim = video_tokens.shape + ctx_len = processed_t5_context.shape[1] + num_heads = wan_layer.cross_attn.num_heads + head_dim = dim // num_heads + + norm_video = wan_layer.norm3(video_tokens) + q = wan_layer.cross_attn.norm_q(wan_layer.cross_attn.q(norm_video)).view(batch, q_len, num_heads, head_dim) + k = wan_layer.cross_attn.norm_k(wan_layer.cross_attn.k(processed_t5_context)).view(batch, ctx_len, num_heads, head_dim) + v = wan_layer.cross_attn.v(processed_t5_context).view(batch, ctx_len, num_heads, head_dim) + return video_tokens + wan_layer.cross_attn.o(self.cross_attn(q, k, v)) + + @torch.no_grad() + def infer(self, weights, pre_infer_out): + model = self.adapter.model + scheduler = self.scheduler + processed_t5_context = pre_infer_out.processed_t5_context + image_context = pre_infer_out.image_context + und_tokens_base = pre_infer_out.und_tokens + + for step_index, t, t_next, dt in scheduler.iter_steps(): + scheduler.step_pre(step_index) + video_tokens = model.video_module.prepare_input(scheduler.video_latents.to(model.dtype)) + state_tokens = pre_infer_out.state.unsqueeze(1).to(model.dtype) + registers = model.action_expert.registers.expand(state_tokens.shape[0], -1, -1) + action_tokens = model.action_expert.input_encoder(state_tokens, scheduler.action_latents, registers) + und_tokens = und_tokens_base.clone() + + video_t_scaled = (t * 1000).expand(state_tokens.shape[0]).to(model.dtype) + action_t_scaled = (t * 1000).expand(state_tokens.shape[0]).to(model.dtype) + + with torch.autocast(device_type="cuda", dtype=model.video_model.precision): + video_head_time_emb, video_adaln_params = model.video_module.get_time_embedding(video_t_scaled, video_tokens.shape[1]) + action_head_time_emb, action_adaln_params = model.action_module.get_time_embedding(action_t_scaled, action_tokens.shape[1]) + + for layer_idx in range(model.config.num_layers): + video_adaln_modulation = model.video_module.compute_adaln_modulation(video_adaln_params, layer_idx) + action_adaln_modulation = model.action_module.compute_adaln_modulation(action_adaln_params, layer_idx) + video_tokens, action_tokens, und_tokens = self._joint_attention( + pre_infer_out, + video_tokens, + action_tokens, + und_tokens, + video_adaln_modulation, + action_adaln_modulation, + layer_idx, + ) + video_tokens = self._cross_attention(video_tokens, processed_t5_context, layer_idx) + video_tokens = model.video_module.process_ffn(video_tokens, video_adaln_modulation, layer_idx) + action_tokens = model.action_module.process_ffn(action_tokens, action_adaln_modulation, layer_idx) + und_tokens = model.und_module.process_ffn(und_tokens, layer_idx) + + video_velocity = model.video_module.apply_output_head(video_tokens, video_head_time_emb) + action_pred_full = model.action_expert.decoder(action_tokens, action_head_time_emb) + action_velocity = action_pred_full[:, 1 : -model.action_expert.config.num_registers, :] + + scheduler.step(video_velocity=video_velocity, action_velocity=action_velocity, dt=dt, condition_frame_latent=pre_infer_out.condition_frame_latent) + + return scheduler.video_latents, scheduler.action_latents diff --git a/lightx2v/models/networks/motus/model.py b/lightx2v/models/networks/motus/model.py new file mode 100644 index 000000000..9a23b44d3 --- /dev/null +++ b/lightx2v/models/networks/motus/model.py @@ -0,0 +1,311 @@ +import inspect +import json +import os +from pathlib import Path +from typing import Any + +import numpy as np +import torch +import torch.nn as nn +from PIL import Image +from loguru import logger +from transformers import AutoProcessor +from tqdm import tqdm + +from lightx2v.models.networks.motus.ops import LinearWithMM, TripleQKVProjector +from lightx2v.models.networks.motus.core import Motus, MotusConfig +from lightx2v.models.networks.motus.image_utils import resize_with_padding +from lightx2v.models.networks.motus.primitives import rope_apply +from lightx2v.models.networks.motus.t5 import T5EncoderModel +from lightx2v.models.networks.motus.infer.post_infer import MotusPostInfer +from lightx2v.models.networks.motus.infer.pre_infer import MotusPreInfer +from lightx2v.models.networks.motus.infer.transformer_infer import MotusTransformerInfer +from lightx2v.models.schedulers.motus.scheduler import MotusScheduler + + +class MotusModel: + """Thin LightX2V wrapper over Motus native inference.""" + + def __init__(self, config, device): + self.config = config + self.device = device + self.motus_root = Path(config.get("model_path", "")).expanduser().resolve() + if not self.motus_root.exists(): + raise FileNotFoundError(f"Motus root not found: {self.motus_root}") + + self._motus_cls = Motus + self._motus_config_cls = MotusConfig + self._resize_with_padding = resize_with_padding + self._rope_apply = rope_apply + self._t5_encoder_cls = T5EncoderModel + + self.model = self._load_model().eval() + self.t5_encoder = self._load_t5_encoder() + self.vlm_processor = self._load_vlm_processor() + self._load_normalization_stats() + self._build_native_stack() + + def _build_native_stack(self): + self.scheduler = MotusScheduler(self.config) + self.pre_infer = MotusPreInfer(self, self.config) + self.transformer_infer = MotusTransformerInfer(self, self.config) + self.post_infer = MotusPostInfer(self, self.config) + + self.pre_infer.set_scheduler(self.scheduler) + self.transformer_infer.set_scheduler(self.scheduler) + self.post_infer.set_scheduler(self.scheduler) + + def _build_model_config(self): + return self._motus_config_cls( + wan_checkpoint_path=self.config["wan_path"], + vae_path=os.path.join(self.config["wan_path"], "Wan2.2_VAE.pth"), + wan_config_path=self.config["wan_path"], + video_precision=self.config.get("video_precision", "bfloat16"), + vlm_checkpoint_path=self.config["vlm_path"], + und_expert_hidden_size=self.config.get("und_expert_hidden_size", 512), + und_expert_ffn_dim_multiplier=self.config.get("und_expert_ffn_dim_multiplier", 4), + und_expert_norm_eps=self.config.get("und_expert_norm_eps", 1e-5), + und_layers_to_extract=self.config.get("und_layers_to_extract"), + vlm_adapter_input_dim=self.config.get("vlm_adapter_input_dim", 2048), + vlm_adapter_projector_type=self.config.get("vlm_adapter_projector_type", "mlp3x_silu"), + num_layers=self.config.get("num_layers", 30), + action_state_dim=self.config.get("action_state_dim", 14), + action_dim=self.config.get("action_dim", 14), + action_expert_dim=self.config.get("action_expert_dim", 1024), + action_expert_ffn_dim_multiplier=self.config.get("action_expert_ffn_dim_multiplier", 4), + action_expert_norm_eps=self.config.get("action_expert_norm_eps", 1e-6), + global_downsample_rate=self.config.get("global_downsample_rate", 3), + video_action_freq_ratio=self.config.get("video_action_freq_ratio", 2), + num_video_frames=self.config.get("num_video_frames", 8), + video_height=self.config.get("video_height", 384), + video_width=self.config.get("video_width", 320), + batch_size=1, + training_mode=self.config.get("training_mode", "finetune"), + load_pretrained_backbones=self.config.get("load_pretrained_backbones", False), + ) + + def _load_model(self): + logger.info("Loading Motus model") + model = self._motus_cls(self._build_model_config()) + self._patch_qwen3_vl_rope_index(model) + model.to(self.device) + model.load_checkpoint(self.config["checkpoint_path"], strict=False) + self._apply_lightx2v_patches(model) + return model + + def _load_t5_encoder(self): + return self._t5_encoder_cls( + text_len=512, + dtype=torch.bfloat16, + device=str(self.device), + checkpoint_path=os.path.join(self.config["wan_path"], "models_t5_umt5-xxl-enc-bf16.pth"), + tokenizer_path=os.path.join(self.config["wan_path"], "google", "umt5-xxl"), + ) + + def _load_vlm_processor(self): + return AutoProcessor.from_pretrained(self.config["vlm_path"], trust_remote_code=True) + + def _patch_qwen3_vl_rope_index(self, root: Any): + visited = set() + + def walk(obj: Any): + obj_id = id(obj) + if obj is None or obj_id in visited: + return + visited.add(obj_id) + + method = getattr(obj, "get_rope_index", None) + if callable(method): + try: + signature = inspect.signature(method) + except (TypeError, ValueError): + signature = None + + if signature and "mm_token_type_ids" in signature.parameters: + def wrapped_get_rope_index(*args, __orig=method, **kwargs): + if "mm_token_type_ids" not in kwargs: + input_ids = kwargs.get("input_ids") + if input_ids is None and args: + input_ids = args[0] + if torch.is_tensor(input_ids): + kwargs["mm_token_type_ids"] = torch.zeros_like(input_ids, dtype=torch.long) + return __orig(*args, **kwargs) + + setattr(obj, "get_rope_index", wrapped_get_rope_index) + + if isinstance(obj, torch.nn.Module): + for child in obj.children(): + walk(child) + + for attr in ("model", "language_model", "visual", "vlm", "backbone"): + child = getattr(obj, attr, None) + if child is not None and child is not obj: + walk(child) + + walk(root) + + def _load_normalization_stats(self): + stat_path = self.motus_root / "utils" / "stat.json" + if stat_path.exists(): + with open(stat_path, "r") as f: + stat_data = json.load(f) + stats = stat_data.get(self.config.get("stats_key", "robotwin2"), {}) + if stats: + self.action_min = torch.tensor(stats["min"], dtype=torch.float32, device=self.device) + self.action_max = torch.tensor(stats["max"], dtype=torch.float32, device=self.device) + self.action_range = self.action_max - self.action_min + return + + action_dim = self.config.get("action_dim", 14) + self.action_min = torch.zeros(action_dim, dtype=torch.float32, device=self.device) + self.action_max = torch.ones(action_dim, dtype=torch.float32, device=self.device) + self.action_range = torch.ones(action_dim, dtype=torch.float32, device=self.device) + + def _quant_flags(self): + quantized = bool(self.config.get("motus_quantized", self.config.get("dit_quantized", False))) + quant_scheme = self.config.get("motus_quant_scheme", self.config.get("dit_quant_scheme", "Default")) + return quantized, quant_scheme + + def _replace_linear_modules(self, module): + quantized, quant_scheme = self._quant_flags() + for name, child in list(module.named_children()): + if isinstance(child, nn.Linear): + setattr( + module, + name, + LinearWithMM.from_linear( + child, + quant_scheme=quant_scheme, + quantized=quantized, + config=self.config, + ), + ) + else: + self._replace_linear_modules(child) + + def _attach_qkv_projectors(self, model): + quantized, quant_scheme = self._quant_flags() + for block in model.action_expert.blocks: + block.wan_action_qkv_mm = TripleQKVProjector( + block.wan_action_qkv.detach(), + quant_scheme=quant_scheme, + quantized=quantized, + config=self.config, + ) + block.wan_action_o = LinearWithMM.from_linear( + block.wan_action_o, + quant_scheme=quant_scheme, + quantized=quantized, + config=self.config, + ) + + for block in model.und_expert.blocks: + block.wan_und_qkv_mm = TripleQKVProjector( + block.wan_und_qkv.detach(), + quant_scheme=quant_scheme, + quantized=quantized, + config=self.config, + ) + block.wan_und_o = LinearWithMM.from_linear( + block.wan_und_o, + quant_scheme=quant_scheme, + quantized=quantized, + config=self.config, + ) + + def _apply_lightx2v_patches(self, model): + self._replace_linear_modules(model.action_expert) + self._replace_linear_modules(model.und_expert) + self._attach_qkv_projectors(model) + + def denormalize_actions(self, actions: torch.Tensor) -> torch.Tensor: + shape = actions.shape + flat = actions.reshape(-1, shape[-1]) + restored = flat * self.action_range.unsqueeze(0) + self.action_min.unsqueeze(0) + return restored.reshape(shape) + + def rope_apply(self, q: torch.Tensor, grid_sizes: torch.Tensor, freqs: torch.Tensor) -> torch.Tensor: + return self._rope_apply(q, grid_sizes, freqs) + + def get_wan_freqs(self) -> torch.Tensor: + freqs = self.model.video_model.wan_model.freqs + if freqs.device != self.device: + freqs = freqs.to(self.device) + return freqs + + def prepare_frame(self, image_path: str) -> torch.Tensor: + image = Image.open(image_path).convert("RGB") + image_np = np.asarray(image).astype(np.float32) / 255.0 + resized_np = self._resize_with_padding( + image_np, + (self.config.get("video_height", 384), self.config.get("video_width", 320)), + ) + if resized_np.dtype == np.uint8: + resized_np = resized_np.astype(np.float32) / 255.0 + return torch.from_numpy(resized_np).permute(2, 0, 1).unsqueeze(0).to(self.device) + + def prepare_state(self, state_value) -> torch.Tensor: + if isinstance(state_value, torch.Tensor): + state = state_value.float() + else: + state = torch.tensor(state_value, dtype=torch.float32) + if state.dim() == 1: + state = state.unsqueeze(0) + return state.to(self.device) + + def build_instruction(self, prompt: str) -> str: + prefix = self.config.get( + "scene_prefix", + "The whole scene is in a realistic, industrial art style with three views: " + "a fixed rear camera, a movable left arm camera, and a movable right arm camera. " + "The aloha robot is currently performing the following task: ", + ) + return f"{prefix}{prompt}" + + def build_t5_embeddings(self, instruction: str): + t5_out = self.t5_encoder([instruction], str(self.device)) + if isinstance(t5_out, torch.Tensor): + return [t5_out.squeeze(0)] if t5_out.dim() == 3 else [t5_out] + return t5_out + + def _tensor_to_pil(self, tensor: torch.Tensor) -> Image.Image: + tensor = tensor.float().clamp(0, 1) + np_img = (tensor.permute(1, 2, 0).cpu().numpy() * 255).astype(np.uint8) + return Image.fromarray(np_img, mode="RGB") + + def build_vlm_inputs(self, instruction: str, first_frame: torch.Tensor): + image = self._tensor_to_pil(first_frame.squeeze(0)) + messages = [{"role": "user", "content": [{"type": "text", "text": instruction}, {"type": "image", "image": image}]}] + text = self.vlm_processor.apply_chat_template(messages, add_generation_prompt=False, tokenize=False) + encoded = self.vlm_processor(text=[text], images=[image], return_tensors="pt") + + vlm_inputs = {} + for key in ("input_ids", "attention_mask", "pixel_values", "image_grid_thw", "video_grid_thw", "second_per_grid_ts", "mm_token_type_ids"): + value = encoded.get(key) + if torch.is_tensor(value): + vlm_inputs[key] = value.to(self.device) + elif value is not None: + vlm_inputs[key] = value + + if "mm_token_type_ids" not in vlm_inputs and "input_ids" in vlm_inputs: + vlm_inputs["mm_token_type_ids"] = torch.zeros_like(vlm_inputs["input_ids"], dtype=torch.long) + return vlm_inputs + + @torch.no_grad() + def encode_condition_frame(self, first_frame: torch.Tensor): + first_frame_norm = (first_frame * 2.0 - 1.0).unsqueeze(2) + return self.model.video_model.encode_video(first_frame_norm.to(self.model.dtype)) + + @torch.no_grad() + def infer(self, image_path: str, prompt: str, state_value, num_inference_steps: int, seed: int | None = None): + self.scheduler.infer_steps = num_inference_steps + pre_infer_out = self.pre_infer.infer(image_path=image_path, prompt=prompt, state_value=state_value, seed=seed) + video_latents, action_latents = self.transformer_infer.infer(None, pre_infer_out) + post_infer_out = self.post_infer.infer(video_latents, action_latents) + + pred_frames = post_infer_out.pred_frames + if pred_frames.dim() == 5: + if pred_frames.shape[1] == 3: + pred_frames = pred_frames.permute(0, 2, 1, 3, 4) + pred_frames = pred_frames.squeeze(0) + return pred_frames, post_infer_out.pred_actions.squeeze(0) diff --git a/lightx2v/models/networks/motus/ops.py b/lightx2v/models/networks/motus/ops.py new file mode 100644 index 000000000..507e98033 --- /dev/null +++ b/lightx2v/models/networks/motus/ops.py @@ -0,0 +1,160 @@ +import copy + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from lightx2v.utils.registry_factory import ATTN_WEIGHT_REGISTER, MM_WEIGHT_REGISTER + + +class LinearWithMM(nn.Module): + """nn.Linear-compatible module with optional LightX2V MM backend.""" + + def __init__(self, in_features, out_features, bias=True, quant_scheme="Default", quantized=False, config=None): + super().__init__() + self.in_features = in_features + self.out_features = out_features + self.weight = nn.Parameter(torch.empty(out_features, in_features)) + if bias: + self.bias = nn.Parameter(torch.empty(out_features)) + else: + self.register_parameter("bias", None) + self.quant_scheme = quant_scheme + self.quantized = quantized and quant_scheme != "Default" + self.config = copy.deepcopy(config or {}) + self.mm = None + + @classmethod + def from_linear(cls, linear: nn.Linear, quant_scheme="Default", quantized=False, config=None): + module = cls( + linear.in_features, + linear.out_features, + bias=linear.bias is not None, + quant_scheme=quant_scheme, + quantized=quantized, + config=config, + ) + with torch.no_grad(): + module.weight.copy_(linear.weight.detach()) + if linear.bias is not None: + module.bias.copy_(linear.bias.detach()) + module = module.to(device=linear.weight.device, dtype=linear.weight.dtype) + module._build_mm() + return module + + @classmethod + def from_tensor(cls, weight: torch.Tensor, bias: torch.Tensor | None = None, quant_scheme="Default", quantized=False, config=None): + out_features, in_features = weight.shape + module = cls( + in_features, + out_features, + bias=bias is not None, + quant_scheme=quant_scheme, + quantized=quantized, + config=config, + ) + with torch.no_grad(): + module.weight.copy_(weight.detach()) + if bias is not None: + module.bias.copy_(bias.detach()) + module = module.to(device=weight.device, dtype=weight.dtype) + module._build_mm() + return module + + def _build_mm(self): + scheme = self.quant_scheme if self.quantized else "Default" + self.mm = MM_WEIGHT_REGISTER[scheme]("__motus_weight__", "__motus_bias__" if self.bias is not None else None) + if hasattr(self.mm, "set_config"): + cfg = copy.deepcopy(self.config) + if self.quantized: + cfg["dit_quantized"] = True + cfg["dit_quant_scheme"] = self.quant_scheme + cfg.setdefault("weight_auto_quant", True) + self.mm.set_config(cfg) + weight_dict = {"__motus_weight__": self.weight.detach()} + if self.bias is not None: + weight_dict["__motus_bias__"] = self.bias.detach() + self.mm.load(weight_dict) + + def _mm_apply(self, x): + if self.mm is None: + self._build_mm() + x2d = x.reshape(-1, x.shape[-1]) + y2d = self.mm.apply(x2d.to(self.weight.dtype)) + if y2d.dtype != x.dtype: + y2d = y2d.to(x.dtype) + return y2d.reshape(*x.shape[:-1], self.out_features) + + def forward(self, x): + if not self.quantized: + return F.linear(x, self.weight, self.bias) + return self._mm_apply(x) + + +class TripleQKVProjector(nn.Module): + """Three-way linear projection for q/k/v from a packed tensor.""" + + def __init__(self, packed_qkv: torch.Tensor, quant_scheme="Default", quantized=False, config=None): + super().__init__() + assert packed_qkv.dim() == 4 + self.num_heads = packed_qkv.shape[1] + self.in_features = packed_qkv.shape[2] + self.head_dim = packed_qkv.shape[3] + self.out_features = self.num_heads * self.head_dim + + q_w = packed_qkv[0].permute(0, 2, 1).reshape(self.out_features, self.in_features).contiguous() + k_w = packed_qkv[1].permute(0, 2, 1).reshape(self.out_features, self.in_features).contiguous() + v_w = packed_qkv[2].permute(0, 2, 1).reshape(self.out_features, self.in_features).contiguous() + + self.q = LinearWithMM.from_tensor(q_w, None, quant_scheme=quant_scheme, quantized=quantized, config=config) + self.k = LinearWithMM.from_tensor(k_w, None, quant_scheme=quant_scheme, quantized=quantized, config=config) + self.v = LinearWithMM.from_tensor(v_w, None, quant_scheme=quant_scheme, quantized=quantized, config=config) + + def forward(self, x): + q = self.q(x).reshape(*x.shape[:-1], self.num_heads, self.head_dim) + k = self.k(x).reshape(*x.shape[:-1], self.num_heads, self.head_dim) + v = self.v(x).reshape(*x.shape[:-1], self.num_heads, self.head_dim) + return q, k, v + + +class RegistryAttention(nn.Module): + """LightX2V attention-kernel wrapper with Wan-style varlen arguments.""" + + def __init__(self, attn_type: str): + super().__init__() + self.attn_type = attn_type + self.kernel = ATTN_WEIGHT_REGISTER[attn_type]() + + def _build_cu_seqlens(self, batch: int, seq_len: int, device: torch.device): + return torch.arange(0, (batch + 1) * seq_len, seq_len, dtype=torch.int32, device=device) + + def _normalize_dtype(self, tensor: torch.Tensor) -> torch.Tensor: + if tensor.dtype in (torch.float16, torch.bfloat16): + return tensor + if tensor.device.type == "cuda": + return tensor.to(torch.bfloat16) + return tensor.to(torch.float32) + + def forward(self, q: torch.Tensor, k: torch.Tensor, v: torch.Tensor, causal: bool = False): + if q.dim() != 4 or k.dim() != 4 or v.dim() != 4: + raise ValueError("RegistryAttention expects q/k/v with shape [B, L, H, D].") + + q = self._normalize_dtype(q) + k = self._normalize_dtype(k) + v = self._normalize_dtype(v) + + batch, q_len = q.shape[:2] + kv_len = k.shape[1] + out = self.kernel.apply( + q=q, + k=k, + v=v, + causal=causal, + cu_seqlens_q=self._build_cu_seqlens(batch, q_len, q.device), + cu_seqlens_kv=self._build_cu_seqlens(batch, kv_len, k.device), + max_seqlen_q=q_len, + max_seqlen_kv=kv_len, + ) + if out.dim() == 2: + out = out.view(batch, q_len, -1) + return out diff --git a/lightx2v/models/networks/motus/primitives.py b/lightx2v/models/networks/motus/primitives.py new file mode 100644 index 000000000..7e5d642bb --- /dev/null +++ b/lightx2v/models/networks/motus/primitives.py @@ -0,0 +1,68 @@ +from functools import lru_cache + +import torch +import torch.nn as nn + + +def sinusoidal_embedding_1d(dim, position): + assert dim % 2 == 0 + half = dim // 2 + position = position.type(torch.float64) + sinusoid = torch.outer(position, torch.pow(10000, -torch.arange(half).to(position).div(half))) + return torch.cat([torch.cos(sinusoid), torch.sin(sinusoid)], dim=1) + + +@torch.amp.autocast("cuda", enabled=False) +def rope_apply(x: torch.Tensor, grid_sizes: torch.Tensor, freqs: torch.Tensor) -> torch.Tensor: + batch, seq, heads, complex_twice_dim = x.shape + assert complex_twice_dim % 2 == 0 + complex_dim = complex_twice_dim // 2 + + c_f = complex_dim - 2 * (complex_dim // 3) + c_h = complex_dim // 3 + c_w = complex_dim // 3 + fpart, hpart, wpart = freqs.split([c_f, c_h, c_w], dim=1) + + x_c = torch.view_as_complex(x.to(torch.float64).reshape(batch, seq, heads, -1, 2)).contiguous() + y_c = x_c.clone() + gsz = grid_sizes.to(torch.long) + uniq, inv = torch.unique(gsz, dim=0, return_inverse=True) + + @lru_cache(maxsize=256) + def _make_freq_grid(f: int, h: int, w: int): + return torch.cat( + [ + fpart[:f].view(f, 1, 1, -1).expand(f, h, w, -1), + hpart[:h].view(1, h, 1, -1).expand(f, h, w, -1), + wpart[:w].view(1, 1, w, -1).expand(f, h, w, -1), + ], + dim=-1, + ).reshape(f * h * w, 1, -1).contiguous() + + for g_idx, (f, h, w) in enumerate(uniq.tolist()): + idx = (inv == g_idx).nonzero(as_tuple=False).squeeze(-1) + if idx.numel() == 0: + continue + seq_len = f * h * w + freq_grid = _make_freq_grid(f, h, w) + y_c[idx, :seq_len] = x_c[idx, :seq_len] * freq_grid + + return torch.view_as_real(y_c).reshape(batch, seq, heads, -1).float() + + +class WanRMSNorm(nn.Module): + def __init__(self, dim, eps=1e-5): + super().__init__() + self.eps = eps + self.weight = nn.Parameter(torch.ones(dim)) + + def forward(self, x): + return (x.float() * torch.rsqrt(x.float().pow(2).mean(dim=-1, keepdim=True) + self.eps)).type_as(x) * self.weight + + +class WanLayerNorm(nn.LayerNorm): + def __init__(self, dim, eps=1e-6, elementwise_affine=False): + super().__init__(dim, elementwise_affine=elementwise_affine, eps=eps) + + def forward(self, x): + return super().forward(x.float()).type_as(x) diff --git a/lightx2v/models/networks/motus/t5.py b/lightx2v/models/networks/motus/t5.py new file mode 100644 index 000000000..31c4e77ef --- /dev/null +++ b/lightx2v/models/networks/motus/t5.py @@ -0,0 +1,3 @@ +from .wan.t5 import T5EncoderModel + +__all__ = ["T5EncoderModel"] diff --git a/lightx2v/models/networks/motus/und_expert.py b/lightx2v/models/networks/motus/und_expert.py new file mode 100644 index 000000000..b95a82321 --- /dev/null +++ b/lightx2v/models/networks/motus/und_expert.py @@ -0,0 +1,56 @@ +import re +from dataclasses import dataclass + +import torch +import torch.nn as nn + +from .primitives import WanLayerNorm, WanRMSNorm + + +@dataclass +class UndExpertConfig: + dim: int = 512 + ffn_dim: int = 2048 + num_layers: int = 30 + vlm_input_dim: int = 2048 + vlm_projector_type: str = "mlp3x_silu" + eps: float = 1e-5 + + +def build_condition_adapter(projector_type, in_features, out_features): + if projector_type == "linear": + return nn.Linear(in_features, out_features) + mlp_silu_match = re.match(r"^mlp(\d+)x_silu$", projector_type) + if mlp_silu_match: + mlp_depth = int(mlp_silu_match.group(1)) + modules = [nn.Linear(in_features, out_features)] + for _ in range(1, mlp_depth): + modules.append(nn.SiLU()) + modules.append(nn.Linear(out_features, out_features)) + return nn.Sequential(*modules) + raise ValueError(f"Unknown projector type: {projector_type}") + + +class UndExpertBlock(nn.Module): + def __init__(self, config: UndExpertConfig, wan_config: dict): + super().__init__() + self.norm1 = WanLayerNorm(config.dim, eps=config.eps) + self.norm2 = WanLayerNorm(config.dim, eps=config.eps) + self.wan_num_heads = wan_config["num_heads"] + self.wan_head_dim = wan_config["head_dim"] + self.wan_dim = wan_config["dim"] + self.wan_und_qkv = nn.Parameter(torch.randn(3, self.wan_num_heads, config.dim, self.wan_head_dim) / (config.dim * self.wan_head_dim) ** 0.5) + self.wan_und_o = nn.Linear(self.wan_dim, config.dim, bias=False) + self.wan_und_norm_q = WanRMSNorm(self.wan_dim, eps=config.eps) + self.wan_und_norm_k = WanRMSNorm(self.wan_dim, eps=config.eps) + self.ffn = nn.Sequential(nn.Linear(config.dim, config.ffn_dim), nn.GELU(approximate="tanh"), nn.Linear(config.ffn_dim, config.dim)) + + +class UndExpert(nn.Module): + def __init__(self, config: UndExpertConfig, wan_config: dict = None, vlm_config: dict = None): + super().__init__() + self.config = config + self.freq_dim = 256 + self.vlm_adapter = build_condition_adapter(config.vlm_projector_type, config.vlm_input_dim, config.dim) + block_cfg = wan_config or {"dim": 3072, "num_heads": 24, "head_dim": 128} + self.blocks = nn.ModuleList([UndExpertBlock(config, block_cfg) for _ in range(config.num_layers)]) diff --git a/lightx2v/models/networks/motus/wan/__init__.py b/lightx2v/models/networks/motus/wan/__init__.py new file mode 100644 index 000000000..9ce1f8e18 --- /dev/null +++ b/lightx2v/models/networks/motus/wan/__init__.py @@ -0,0 +1,13 @@ +from .attention import flash_attention +from .model import WanModel +from .t5 import T5EncoderModel +from .tokenizers import HuggingfaceTokenizer +from .vae2_2 import Wan2_2_VAE + +__all__ = [ + "WanModel", + "Wan2_2_VAE", + "T5EncoderModel", + "HuggingfaceTokenizer", + "flash_attention", +] diff --git a/lightx2v/models/networks/motus/wan/attention.py b/lightx2v/models/networks/motus/wan/attention.py new file mode 100644 index 000000000..cc0b2f681 --- /dev/null +++ b/lightx2v/models/networks/motus/wan/attention.py @@ -0,0 +1,98 @@ +import warnings + +import torch + +try: + import flash_attn_interface + + FLASH_ATTN_3_AVAILABLE = True +except ModuleNotFoundError: + FLASH_ATTN_3_AVAILABLE = False + +try: + import flash_attn + + FLASH_ATTN_2_AVAILABLE = True +except ModuleNotFoundError: + FLASH_ATTN_2_AVAILABLE = False + + +def flash_attention( + q, + k, + v, + q_lens=None, + k_lens=None, + dropout_p=0.0, + softmax_scale=None, + q_scale=None, + causal=False, + window_size=(-1, -1), + deterministic=False, + dtype=torch.bfloat16, + version=None, +): + half_dtypes = (torch.float16, torch.bfloat16) + assert dtype in half_dtypes + assert q.device.type == "cuda" and q.size(-1) <= 256 + + batch, q_len, kv_len, out_dtype = q.size(0), q.size(1), k.size(1), q.dtype + + def half(x): + return x if x.dtype in half_dtypes else x.to(dtype) + + if q_lens is None: + q = half(q.flatten(0, 1)) + q_lens = torch.tensor([q_len] * batch, dtype=torch.int32).to(device=q.device, non_blocking=True) + else: + q = half(torch.cat([u[:v] for u, v in zip(q, q_lens)])) + + if k_lens is None: + k = half(k.flatten(0, 1)) + v = half(v.flatten(0, 1)) + k_lens = torch.tensor([kv_len] * batch, dtype=torch.int32).to(device=k.device, non_blocking=True) + else: + k = half(torch.cat([u[:v] for u, v in zip(k, k_lens)])) + v = half(torch.cat([u[:v] for u, v in zip(v, k_lens)])) + + q = q.to(v.dtype) + k = k.to(v.dtype) + if q_scale is not None: + q = q * q_scale + + if version is not None and version == 3 and not FLASH_ATTN_3_AVAILABLE: + warnings.warn("Flash attention 3 is not available, using flash attention 2 instead.") + + if (version is None or version == 3) and FLASH_ATTN_3_AVAILABLE: + x = flash_attn_interface.flash_attn_varlen_func( + q=q, + k=k, + v=v, + cu_seqlens_q=torch.cat([q_lens.new_zeros([1]), q_lens]).cumsum(0, dtype=torch.int32).to(q.device, non_blocking=True), + cu_seqlens_k=torch.cat([k_lens.new_zeros([1]), k_lens]).cumsum(0, dtype=torch.int32).to(q.device, non_blocking=True), + seqused_q=None, + seqused_k=None, + max_seqlen_q=q_len, + max_seqlen_k=kv_len, + softmax_scale=softmax_scale, + causal=causal, + deterministic=deterministic, + )[0].unflatten(0, (batch, q_len)) + else: + assert FLASH_ATTN_2_AVAILABLE + x = flash_attn.flash_attn_varlen_func( + q=q, + k=k, + v=v, + cu_seqlens_q=torch.cat([q_lens.new_zeros([1]), q_lens]).cumsum(0, dtype=torch.int32).to(q.device, non_blocking=True), + cu_seqlens_k=torch.cat([k_lens.new_zeros([1]), k_lens]).cumsum(0, dtype=torch.int32).to(q.device, non_blocking=True), + max_seqlen_q=q_len, + max_seqlen_k=kv_len, + dropout_p=dropout_p, + softmax_scale=softmax_scale, + causal=causal, + window_size=window_size, + deterministic=deterministic, + ).unflatten(0, (batch, q_len)) + + return x.type(out_dtype) diff --git a/lightx2v/models/networks/motus/wan/model.py b/lightx2v/models/networks/motus/wan/model.py new file mode 100644 index 000000000..f6c92e42c --- /dev/null +++ b/lightx2v/models/networks/motus/wan/model.py @@ -0,0 +1,680 @@ +# Copyright 2024-2025 The Alibaba Wan Team Authors. All rights reserved. +import math + +import torch +import torch.nn as nn +from diffusers.configuration_utils import ConfigMixin, register_to_config +from diffusers.models.modeling_utils import ModelMixin + +from .attention import flash_attention + +__all__ = ['WanModel'] + + +def sinusoidal_embedding_1d(dim, position): + # preprocess + assert dim % 2 == 0 + half = dim // 2 + position = position.type(torch.float64) + + # calculation + sinusoid = torch.outer( + position, torch.pow(10000, -torch.arange(half).to(position).div(half))) + x = torch.cat([torch.cos(sinusoid), torch.sin(sinusoid)], dim=1) + return x + + +@torch.amp.autocast('cuda', enabled=False) +def rope_params(max_seq_len, dim, theta=10000): + assert dim % 2 == 0 + freqs = torch.outer( + torch.arange(max_seq_len), + 1.0 / torch.pow(theta, + torch.arange(0, dim, 2).to(torch.float64).div(dim))) + freqs = torch.polar(torch.ones_like(freqs), freqs) + return freqs + + +from functools import lru_cache + +@torch.amp.autocast('cuda', enabled=False) +def rope_apply(x: torch.Tensor, grid_sizes: torch.Tensor, freqs: torch.Tensor) -> torch.Tensor: + B, T, N, CC = x.shape + assert CC % 2 == 0, "last dim must be 2C (real, imag)" + C = CC // 2 + + c_f = C - 2 * (C // 3) + c_h = C // 3 + c_w = C // 3 + fpart, hpart, wpart = freqs.split([c_f, c_h, c_w], dim=1) + + x_c = torch.view_as_complex(x.to(torch.float64).reshape(B, T, N, -1, 2)).contiguous() + + y_c = x_c.clone() + + gsz = grid_sizes.to(torch.long) + uniq, inv = torch.unique(gsz, dim=0, return_inverse=True) + + @lru_cache(maxsize=256) + def _make_freq_grid(f: int, h: int, w: int): + fi = torch.cat([ + fpart[:f].view(f, 1, 1, -1).expand(f, h, w, -1), # [f,h,w,c_f] + hpart[:h].view(1, h, 1, -1).expand(f, h, w, -1), # [f,h,w,c_h] + wpart[:w].view(1, 1, w, -1).expand(f, h, w, -1), # [f,h,w,c_w] + ], dim=-1).reshape(f*h*w, 1, -1) # [seq_len,1,C] + return fi.contiguous() + + for g_idx, (f, h, w) in enumerate(uniq.tolist()): + idx = (inv == g_idx).nonzero(as_tuple=False).squeeze(-1) + if idx.numel() == 0: + continue + seq_len = f * h * w + + freq_grid = _make_freq_grid(f, h, w) # [seq_len,1,C] + + y_c[idx, :seq_len] = x_c[idx, :seq_len] * freq_grid + + y = torch.view_as_real(y_c).reshape(B, T, N, -1).float() + # assert rope_apply_original(x, grid_sizes, freqs).allclose(y, atol=1e-5) + return y + +@torch.amp.autocast('cuda', enabled=False) +def rope_apply_original(x, grid_sizes, freqs): + n, c = x.size(2), x.size(3) // 2 + + # split freqs + freqs = freqs.split([c - 2 * (c // 3), c // 3, c // 3], dim=1) + + # loop over samples + output = [] + for i, (f, h, w) in enumerate(grid_sizes.tolist()): + seq_len = f * h * w + + # precompute multipliers + x_i = torch.view_as_complex(x[i, :seq_len].to(torch.float64).reshape( + seq_len, n, -1, 2)) + freqs_i = torch.cat([ + freqs[0][:f].view(f, 1, 1, -1).expand(f, h, w, -1), + freqs[1][:h].view(1, h, 1, -1).expand(f, h, w, -1), + freqs[2][:w].view(1, 1, w, -1).expand(f, h, w, -1) + ], + dim=-1).reshape(seq_len, 1, -1) + + # apply rotary embedding + x_i = torch.view_as_real(x_i * freqs_i).flatten(2) + x_i = torch.cat([x_i, x[i, seq_len:]]) + + # append to collection + output.append(x_i) + return torch.stack(output).float() + + +class WanRMSNorm(nn.Module): + + def __init__(self, dim, eps=1e-5): + super().__init__() + self.dim = dim + self.eps = eps + self.weight = nn.Parameter(torch.ones(dim)) + + def forward(self, x): + r""" + Args: + x(Tensor): Shape [B, L, C] + """ + return self._norm(x.float()).type_as(x) * self.weight + + def _norm(self, x): + return x * torch.rsqrt(x.pow(2).mean(dim=-1, keepdim=True) + self.eps) + + +class WanLayerNorm(nn.LayerNorm): + + def __init__(self, dim, eps=1e-6, elementwise_affine=False): + super().__init__(dim, elementwise_affine=elementwise_affine, eps=eps) + + def forward(self, x): + r""" + Args: + x(Tensor): Shape [B, L, C] + """ + return super().forward(x.float()).type_as(x) + + +class WanSelfAttention(nn.Module): + + def __init__(self, + dim, + num_heads, + window_size=(-1, -1), + qk_norm=True, + eps=1e-6): + assert dim % num_heads == 0 + super().__init__() + self.dim = dim + self.num_heads = num_heads + self.head_dim = dim // num_heads + self.window_size = window_size + self.qk_norm = qk_norm + self.eps = eps + + # layers + self.q = nn.Linear(dim, dim) + self.k = nn.Linear(dim, dim) + self.v = nn.Linear(dim, dim) + self.o = nn.Linear(dim, dim) + self.norm_q = WanRMSNorm(dim, eps=eps) if qk_norm else nn.Identity() + self.norm_k = WanRMSNorm(dim, eps=eps) if qk_norm else nn.Identity() + + def forward(self, x, seq_lens, grid_sizes, freqs, + action_q: torch.Tensor = None, + action_k: torch.Tensor = None, + action_v: torch.Tensor = None, + und_q: torch.Tensor = None, + und_k: torch.Tensor = None, + und_v: torch.Tensor = None): + r""" + Args: + x(Tensor): Shape [B, L, num_heads, C / num_heads] + seq_lens(Tensor): Shape [B] + grid_sizes(Tensor): Shape [B, 3], the second dimension contains (F, H, W) + freqs(Tensor): Rope freqs, shape [1024, C / num_heads / 2] + action_q/k/v(Tensor, optional): Action expert Q/K/V for trimodal MoT + und_q/k/v(Tensor, optional): Understanding expert Q/K/V for trimodal MoT + """ + b, s, n, d = *x.shape[:2], self.num_heads, self.head_dim + + # query, key, value function + def qkv_fn(x): + q = self.norm_q(self.q(x)).view(b, s, n, d) + k = self.norm_k(self.k(x)).view(b, s, n, d) + v = self.v(x).view(b, s, n, d) + return q, k, v + + q, k, v = qkv_fn(x) + + # Trimodal MoT branch: WAN + Action + Understanding + if action_q is not None or und_q is not None: + L_x = q.size(1) + + # Apply RoPE only to video tokens (q, k) + q_video_rope = rope_apply(q, grid_sizes, freqs) + k_video_rope = rope_apply(k, grid_sizes, freqs) + + # Prepare parts for concatenation + q_parts = [q_video_rope] + k_parts = [k_video_rope] + v_parts = [v] + + # Add action tokens if provided + if action_q is not None: + q_parts.append(action_q) + k_parts.append(action_k) + v_parts.append(action_v) + L_action = action_q.size(1) + else: + L_action = 0 + + # Add understanding tokens if provided + if und_q is not None: + q_parts.append(und_q) + k_parts.append(und_k) + v_parts.append(und_v) + L_und = und_q.size(1) + else: + L_und = 0 + + # Concatenate all modalities + q_cat = torch.cat(q_parts, dim=1) + k_cat = torch.cat(k_parts, dim=1) + v_cat = torch.cat(v_parts, dim=1) + + attn_out = flash_attention( + q=q_cat, + k=k_cat, + v=v_cat, + k_lens=seq_lens, + window_size=self.window_size) + + # Split outputs back to respective modalities + x_out = attn_out[:, :L_x, :, :] + outputs = [x_out] + + start_idx = L_x + if action_q is not None: + action_out = attn_out[:, start_idx:start_idx+L_action, :, :] + outputs.append(action_out) + start_idx += L_action + else: + outputs.append(None) + + if und_q is not None: + und_out = attn_out[:, start_idx:start_idx+L_und, :, :] + outputs.append(und_out) + else: + outputs.append(None) + + # Project WAN branch; other branches returned in head shape for external projection + x_out = x_out.flatten(2) + x_out = self.o(x_out) + outputs[0] = x_out + + return tuple(outputs) + + # Standard branch (no MoT) + x = flash_attention( + q=rope_apply(q, grid_sizes, freqs), + k=rope_apply(k, grid_sizes, freqs), + v=v, + k_lens=seq_lens, + window_size=self.window_size) + + # output + x = x.flatten(2) + x = self.o(x) + return x + + +class WanCrossAttention(WanSelfAttention): + + def forward(self, x, context, context_lens): + r""" + Args: + x(Tensor): Shape [B, L1, C] + context(Tensor): Shape [B, L2, C] + context_lens(Tensor): Shape [B] + """ + b, n, d = x.size(0), self.num_heads, self.head_dim + + # compute query, key, value + q = self.norm_q(self.q(x)).view(b, -1, n, d) + k = self.norm_k(self.k(context)).view(b, -1, n, d) + v = self.v(context).view(b, -1, n, d) + + # compute attention + x = flash_attention(q, k, v, k_lens=context_lens) + + # output + x = x.flatten(2) + x = self.o(x) + return x + + +class WanAttentionBlock(nn.Module): + + def __init__(self, + dim, + ffn_dim, + num_heads, + window_size=(-1, -1), + qk_norm=True, + cross_attn_norm=False, + eps=1e-6): + super().__init__() + self.dim = dim + self.ffn_dim = ffn_dim + self.num_heads = num_heads + self.window_size = window_size + self.qk_norm = qk_norm + self.cross_attn_norm = cross_attn_norm + self.eps = eps + + # layers + self.norm1 = WanLayerNorm(dim, eps) + self.self_attn = WanSelfAttention(dim, num_heads, window_size, qk_norm, + eps) + + self.norm3 = WanLayerNorm( + dim, eps, + elementwise_affine=True) if cross_attn_norm else nn.Identity() + self.cross_attn = WanCrossAttention(dim, num_heads, (-1, -1), qk_norm, + eps) + self.norm2 = WanLayerNorm(dim, eps) + self.ffn = nn.Sequential( + nn.Linear(dim, ffn_dim), nn.GELU(approximate='tanh'), + nn.Linear(ffn_dim, dim)) + + # modulation + self.modulation = nn.Parameter(torch.randn(1, 6, dim) / dim**0.5) + + def forward( + self, + x, + e, + seq_lens, + grid_sizes, + freqs, + context, + context_lens, + ): + r""" + Args: + x(Tensor): Shape [B, L, C] + e(Tensor): Shape [B, L1, 6, C] + seq_lens(Tensor): Shape [B], length of each sequence in batch + grid_sizes(Tensor): Shape [B, 3], the second dimension contains (F, H, W) + freqs(Tensor): Rope freqs, shape [1024, C / num_heads / 2] + """ + assert e.dtype == torch.float32 + with torch.amp.autocast('cuda', dtype=torch.float32): + e = (self.modulation.unsqueeze(0) + e).chunk(6, dim=2) + assert e[0].dtype == torch.float32 + + # self-attention + y = self.self_attn( + self.norm1(x).float() * (1 + e[1].squeeze(2)) + e[0].squeeze(2), + seq_lens, grid_sizes, freqs) + with torch.amp.autocast('cuda', dtype=torch.float32): + x = x + y * e[2].squeeze(2) + + # cross-attention & ffn function + def cross_attn_ffn(x, context, context_lens, e): + x = x + self.cross_attn(self.norm3(x), context, context_lens) + y = self.ffn( + self.norm2(x).float() * (1 + e[4].squeeze(2)) + e[3].squeeze(2)) + with torch.amp.autocast('cuda', dtype=torch.float32): + x = x + y * e[5].squeeze(2) + return x + + x = cross_attn_ffn(x, context, context_lens, e) + return x + + +class Head(nn.Module): + + def __init__(self, dim, out_dim, patch_size, eps=1e-6): + super().__init__() + self.dim = dim + self.out_dim = out_dim + self.patch_size = patch_size + self.eps = eps + + # layers + out_dim = math.prod(patch_size) * out_dim + self.norm = WanLayerNorm(dim, eps) + self.head = nn.Linear(dim, out_dim) + + # modulation + self.modulation = nn.Parameter(torch.randn(1, 2, dim) / dim**0.5) + + def forward(self, x, e): + r""" + Args: + x(Tensor): Shape [B, L1, C] + e(Tensor): Shape [B, L1, C] + """ + assert e.dtype == torch.float32 + with torch.amp.autocast('cuda', dtype=torch.float32): + e = (self.modulation.unsqueeze(0) + e.unsqueeze(2)).chunk(2, dim=2) + x = ( + self.head( + self.norm(x) * (1 + e[1].squeeze(2)) + e[0].squeeze(2))) + return x + + +class WanModel(ModelMixin, ConfigMixin): + r""" + Wan diffusion backbone supporting both text-to-video and image-to-video. + """ + + ignore_for_config = [ + 'patch_size', 'cross_attn_norm', 'qk_norm', 'text_dim', 'window_size' + ] + _no_split_modules = ['WanAttentionBlock'] + + @register_to_config + def __init__(self, + model_type='t2v', + patch_size=(1, 2, 2), + text_len=512, + in_dim=16, + dim=2048, + ffn_dim=8192, + freq_dim=256, + text_dim=4096, + out_dim=16, + num_heads=16, + num_layers=32, + window_size=(-1, -1), + qk_norm=True, + cross_attn_norm=True, + eps=1e-6): + r""" + Initialize the diffusion model backbone. + + Args: + model_type (`str`, *optional*, defaults to 't2v'): + Model variant - 't2v' (text-to-video) or 'i2v' (image-to-video) + patch_size (`tuple`, *optional*, defaults to (1, 2, 2)): + 3D patch dimensions for video embedding (t_patch, h_patch, w_patch) + text_len (`int`, *optional*, defaults to 512): + Fixed length for text embeddings + in_dim (`int`, *optional*, defaults to 16): + Input video channels (C_in) + dim (`int`, *optional*, defaults to 2048): + Hidden dimension of the transformer + ffn_dim (`int`, *optional*, defaults to 8192): + Intermediate dimension in feed-forward network + freq_dim (`int`, *optional*, defaults to 256): + Dimension for sinusoidal time embeddings + text_dim (`int`, *optional*, defaults to 4096): + Input dimension for text embeddings + out_dim (`int`, *optional*, defaults to 16): + Output video channels (C_out) + num_heads (`int`, *optional*, defaults to 16): + Number of attention heads + num_layers (`int`, *optional*, defaults to 32): + Number of transformer blocks + window_size (`tuple`, *optional*, defaults to (-1, -1)): + Window size for local attention (-1 indicates global attention) + qk_norm (`bool`, *optional*, defaults to True): + Enable query/key normalization + cross_attn_norm (`bool`, *optional*, defaults to False): + Enable cross-attention normalization + eps (`float`, *optional*, defaults to 1e-6): + Epsilon value for normalization layers + """ + + super().__init__() + + assert model_type in ['t2v', 'i2v', 'ti2v'] + self.model_type = model_type + + self.patch_size = patch_size + self.text_len = text_len + self.in_dim = in_dim + self.dim = dim + self.ffn_dim = ffn_dim + self.freq_dim = freq_dim + self.text_dim = text_dim + self.out_dim = out_dim + self.num_heads = num_heads + self.num_layers = num_layers + self.window_size = window_size + self.qk_norm = qk_norm + self.cross_attn_norm = cross_attn_norm + self.eps = eps + + # embeddings + self.patch_embedding = nn.Conv3d( + in_dim, dim, kernel_size=patch_size, stride=patch_size) + self.text_embedding = nn.Sequential( + nn.Linear(text_dim, dim), nn.GELU(approximate='tanh'), + nn.Linear(dim, dim)) + + self.time_embedding = nn.Sequential( + nn.Linear(freq_dim, dim), nn.SiLU(), nn.Linear(dim, dim)) + self.time_projection = nn.Sequential(nn.SiLU(), nn.Linear(dim, dim * 6)) + + # blocks + self.blocks = nn.ModuleList([ + WanAttentionBlock(dim, ffn_dim, num_heads, window_size, qk_norm, + cross_attn_norm, eps) for _ in range(num_layers) + ]) + + # head + self.head = Head(dim, out_dim, patch_size, eps) + + # buffers (don't use register_buffer otherwise dtype will be changed in to()) + assert (dim % num_heads) == 0 and (dim // num_heads) % 2 == 0 + d = dim // num_heads + self.freqs = torch.cat([ + rope_params(1024, d - 4 * (d // 6)), + rope_params(1024, 2 * (d // 6)), + rope_params(1024, 2 * (d // 6)) + ], + dim=1) + + # initialize weights + self.init_weights() + + def forward( + self, + x, + t, + context, + seq_len, + y=None, + ): + r""" + Forward pass through the diffusion model + + Args: + x (List[Tensor]): + List of input video tensors, each with shape [C_in, F, H, W] + t (Tensor): + Diffusion timesteps tensor of shape [B] + context (List[Tensor]): + List of text embeddings each with shape [L, C] + seq_len (`int`): + Maximum sequence length for positional encoding + y (List[Tensor], *optional*): + Conditional video inputs for image-to-video mode, same shape as x + + Returns: + List[Tensor]: + List of denoised video tensors with original input shapes [C_out, F, H / 8, W / 8] + """ + if self.model_type == 'i2v': + assert y is not None + # params + device = self.patch_embedding.weight.device + if self.freqs.device != device: + self.freqs = self.freqs.to(device) + + if y is not None: + x = [torch.cat([u, v], dim=0) for u, v in zip(x, y)] + + # embeddings + x = [self.patch_embedding(u.unsqueeze(0)) for u in x] + grid_sizes = torch.stack( + [torch.tensor(u.shape[2:], dtype=torch.long) for u in x]) + x = [u.flatten(2).transpose(1, 2) for u in x] + seq_lens = torch.tensor([u.size(1) for u in x], dtype=torch.long) + assert seq_lens.max() <= seq_len + x = torch.cat([ + torch.cat([u, u.new_zeros(1, seq_len - u.size(1), u.size(2))], + dim=1) for u in x + ]) + + # time embeddings + if t.dim() == 1: + t = t.expand(t.size(0), seq_len) + with torch.amp.autocast('cuda', dtype=torch.float32): + bt = t.size(0) + t = t.flatten() + e = self.time_embedding( + sinusoidal_embedding_1d(self.freq_dim, + t).unflatten(0, (bt, seq_len)).float()) + e0 = self.time_projection(e).unflatten(2, (6, self.dim)) + assert e.dtype == torch.float32 and e0.dtype == torch.float32 + + # context + context_lens = None + context = self.text_embedding( + torch.stack([ + torch.cat( + [u, u.new_zeros(self.text_len - u.size(0), u.size(1))]) + for u in context + ])) + + # arguments + kwargs = dict( + e=e0, + seq_lens=seq_lens, + grid_sizes=grid_sizes, + freqs=self.freqs, + context=context, + context_lens=context_lens) + + for block in self.blocks: + x = block(x, **kwargs) + + # head + x = self.head(x, e) + + # unpatchify + x = self.unpatchify(x, grid_sizes) + return [u.float() for u in x] + + def unpatchify(self, x, grid_sizes): + r""" + Reconstruct video tensors from patch embeddings. + + Args: + x (List[Tensor]): + List of patchified features, each with shape [L, C_out * prod(patch_size)], 似乎prod指的是**2, [360, 48 * prod(2)] = [360, 192] + grid_sizes (Tensor): + Original spatial-temporal grid dimensions before patching, + shape [B, 3] (3 dimensions correspond to F_patches, H_patches, W_patches) + + Returns: + List[Tensor]: + Reconstructed video tensors with shape [C_out, F, H / 8, W / 8] + """ + # 开始前,x.shape = [1, 360, 192] + c = self.out_dim + out = [] + # grid_sizes.tolist() = [[3, 12, 10]] + for u, v in zip(x, grid_sizes.tolist()): + # 裁掉多余 token,并 reshape 成 patch 网格,排成原patch的形状 + # 因为有些实现里,序列可能做过 padding 或对齐,所以这里只取前 F_patches * H_patches * W_patches 个 patch + # [F_patches, H_patches, W_patches, pF, pH, pW, C_out], 我猜为:[3, 12, 10, 1, 2, 2, 48] + u = u[:math.prod(v)].view(*v, *self.patch_size, c) + # 交换维度,把 patch 网格和 patch 内部位置交错排列 + # f h w:patch 网格坐标;p q r:patch 内部坐标;c:通道 + # 交换后为:[C_out, F_patches, pF(一个 patch 覆盖多少帧), H_patches, pH(高度维 上,一个 patch 覆盖多少像素), W_patches, pW] + # 下一步要直接 reshape 成完整视频,F_patches 和 pF 合并成完整帧数 F,H_patches 和 pH 合并成完整高度,W_patches 和 pW 合并成完整宽度 + u = torch.einsum('fhwpqrc->cfphqwr', u) + # 下行做了如下操作:[ + # F_patches * pF, + # H_patches * pH, + # W_patches * pW + # ],似的shape最终变成:[C_out, F, H, W] + u = u.reshape(c, *[i * j for i, j in zip(v, self.patch_size)]) # shape = [48, 3, 24, 20] + out.append(u) # 似乎就只有一个元素,我怀疑和bsz有关 + return out + + def init_weights(self): + r""" + Initialize model parameters using Xavier initialization. + """ + + # basic init + for m in self.modules(): + if isinstance(m, nn.Linear): + nn.init.xavier_uniform_(m.weight) + if m.bias is not None: + nn.init.zeros_(m.bias) + + # init embeddings + nn.init.xavier_uniform_(self.patch_embedding.weight.flatten(1)) + for m in self.text_embedding.modules(): + if isinstance(m, nn.Linear): + nn.init.normal_(m.weight, std=.02) + for m in self.time_embedding.modules(): + if isinstance(m, nn.Linear): + nn.init.normal_(m.weight, std=.02) + + # init output layer + nn.init.zeros_(self.head.head.weight) diff --git a/lightx2v/models/networks/motus/wan/t5.py b/lightx2v/models/networks/motus/wan/t5.py new file mode 100644 index 000000000..c841b044a --- /dev/null +++ b/lightx2v/models/networks/motus/wan/t5.py @@ -0,0 +1,513 @@ +# Modified from transformers.models.t5.modeling_t5 +# Copyright 2024-2025 The Alibaba Wan Team Authors. All rights reserved. +import logging +import math + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from .tokenizers import HuggingfaceTokenizer + +__all__ = [ + 'T5Model', + 'T5Encoder', + 'T5Decoder', + 'T5EncoderModel', +] + + +def fp16_clamp(x): + if x.dtype == torch.float16 and torch.isinf(x).any(): + clamp = torch.finfo(x.dtype).max - 1000 + x = torch.clamp(x, min=-clamp, max=clamp) + return x + + +def init_weights(m): + if isinstance(m, T5LayerNorm): + nn.init.ones_(m.weight) + elif isinstance(m, T5Model): + nn.init.normal_(m.token_embedding.weight, std=1.0) + elif isinstance(m, T5FeedForward): + nn.init.normal_(m.gate[0].weight, std=m.dim**-0.5) + nn.init.normal_(m.fc1.weight, std=m.dim**-0.5) + nn.init.normal_(m.fc2.weight, std=m.dim_ffn**-0.5) + elif isinstance(m, T5Attention): + nn.init.normal_(m.q.weight, std=(m.dim * m.dim_attn)**-0.5) + nn.init.normal_(m.k.weight, std=m.dim**-0.5) + nn.init.normal_(m.v.weight, std=m.dim**-0.5) + nn.init.normal_(m.o.weight, std=(m.num_heads * m.dim_attn)**-0.5) + elif isinstance(m, T5RelativeEmbedding): + nn.init.normal_( + m.embedding.weight, std=(2 * m.num_buckets * m.num_heads)**-0.5) + + +class GELU(nn.Module): + + def forward(self, x): + return 0.5 * x * (1.0 + torch.tanh( + math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))) + + +class T5LayerNorm(nn.Module): + + def __init__(self, dim, eps=1e-6): + super(T5LayerNorm, self).__init__() + self.dim = dim + self.eps = eps + self.weight = nn.Parameter(torch.ones(dim)) + + def forward(self, x): + x = x * torch.rsqrt(x.float().pow(2).mean(dim=-1, keepdim=True) + + self.eps) + if self.weight.dtype in [torch.float16, torch.bfloat16]: + x = x.type_as(self.weight) + return self.weight * x + + +class T5Attention(nn.Module): + + def __init__(self, dim, dim_attn, num_heads, dropout=0.1): + assert dim_attn % num_heads == 0 + super(T5Attention, self).__init__() + self.dim = dim + self.dim_attn = dim_attn + self.num_heads = num_heads + self.head_dim = dim_attn // num_heads + + # layers + self.q = nn.Linear(dim, dim_attn, bias=False) + self.k = nn.Linear(dim, dim_attn, bias=False) + self.v = nn.Linear(dim, dim_attn, bias=False) + self.o = nn.Linear(dim_attn, dim, bias=False) + self.dropout = nn.Dropout(dropout) + + def forward(self, x, context=None, mask=None, pos_bias=None): + """ + x: [B, L1, C]. + context: [B, L2, C] or None. + mask: [B, L2] or [B, L1, L2] or None. + """ + # check inputs + context = x if context is None else context + b, n, c = x.size(0), self.num_heads, self.head_dim + + # compute query, key, value + q = self.q(x).view(b, -1, n, c) + k = self.k(context).view(b, -1, n, c) + v = self.v(context).view(b, -1, n, c) + + # attention bias + attn_bias = x.new_zeros(b, n, q.size(1), k.size(1)) + if pos_bias is not None: + attn_bias += pos_bias + if mask is not None: + assert mask.ndim in [2, 3] + mask = mask.view(b, 1, 1, + -1) if mask.ndim == 2 else mask.unsqueeze(1) + attn_bias.masked_fill_(mask == 0, torch.finfo(x.dtype).min) + + # compute attention (T5 does not use scaling) + attn = torch.einsum('binc,bjnc->bnij', q, k) + attn_bias + attn = F.softmax(attn.float(), dim=-1).type_as(attn) + x = torch.einsum('bnij,bjnc->binc', attn, v) + + # output + x = x.reshape(b, -1, n * c) + x = self.o(x) + x = self.dropout(x) + return x + + +class T5FeedForward(nn.Module): + + def __init__(self, dim, dim_ffn, dropout=0.1): + super(T5FeedForward, self).__init__() + self.dim = dim + self.dim_ffn = dim_ffn + + # layers + self.gate = nn.Sequential(nn.Linear(dim, dim_ffn, bias=False), GELU()) + self.fc1 = nn.Linear(dim, dim_ffn, bias=False) + self.fc2 = nn.Linear(dim_ffn, dim, bias=False) + self.dropout = nn.Dropout(dropout) + + def forward(self, x): + x = self.fc1(x) * self.gate(x) + x = self.dropout(x) + x = self.fc2(x) + x = self.dropout(x) + return x + + +class T5SelfAttention(nn.Module): + + def __init__(self, + dim, + dim_attn, + dim_ffn, + num_heads, + num_buckets, + shared_pos=True, + dropout=0.1): + super(T5SelfAttention, self).__init__() + self.dim = dim + self.dim_attn = dim_attn + self.dim_ffn = dim_ffn + self.num_heads = num_heads + self.num_buckets = num_buckets + self.shared_pos = shared_pos + + # layers + self.norm1 = T5LayerNorm(dim) + self.attn = T5Attention(dim, dim_attn, num_heads, dropout) + self.norm2 = T5LayerNorm(dim) + self.ffn = T5FeedForward(dim, dim_ffn, dropout) + self.pos_embedding = None if shared_pos else T5RelativeEmbedding( + num_buckets, num_heads, bidirectional=True) + + def forward(self, x, mask=None, pos_bias=None): + e = pos_bias if self.shared_pos else self.pos_embedding( + x.size(1), x.size(1)) + x = fp16_clamp(x + self.attn(self.norm1(x), mask=mask, pos_bias=e)) + x = fp16_clamp(x + self.ffn(self.norm2(x))) + return x + + +class T5CrossAttention(nn.Module): + + def __init__(self, + dim, + dim_attn, + dim_ffn, + num_heads, + num_buckets, + shared_pos=True, + dropout=0.1): + super(T5CrossAttention, self).__init__() + self.dim = dim + self.dim_attn = dim_attn + self.dim_ffn = dim_ffn + self.num_heads = num_heads + self.num_buckets = num_buckets + self.shared_pos = shared_pos + + # layers + self.norm1 = T5LayerNorm(dim) + self.self_attn = T5Attention(dim, dim_attn, num_heads, dropout) + self.norm2 = T5LayerNorm(dim) + self.cross_attn = T5Attention(dim, dim_attn, num_heads, dropout) + self.norm3 = T5LayerNorm(dim) + self.ffn = T5FeedForward(dim, dim_ffn, dropout) + self.pos_embedding = None if shared_pos else T5RelativeEmbedding( + num_buckets, num_heads, bidirectional=False) + + def forward(self, + x, + mask=None, + encoder_states=None, + encoder_mask=None, + pos_bias=None): + e = pos_bias if self.shared_pos else self.pos_embedding( + x.size(1), x.size(1)) + x = fp16_clamp(x + self.self_attn(self.norm1(x), mask=mask, pos_bias=e)) + x = fp16_clamp(x + self.cross_attn( + self.norm2(x), context=encoder_states, mask=encoder_mask)) + x = fp16_clamp(x + self.ffn(self.norm3(x))) + return x + + +class T5RelativeEmbedding(nn.Module): + + def __init__(self, num_buckets, num_heads, bidirectional, max_dist=128): + super(T5RelativeEmbedding, self).__init__() + self.num_buckets = num_buckets + self.num_heads = num_heads + self.bidirectional = bidirectional + self.max_dist = max_dist + + # layers + self.embedding = nn.Embedding(num_buckets, num_heads) + + def forward(self, lq, lk): + device = self.embedding.weight.device + # rel_pos = torch.arange(lk).unsqueeze(0).to(device) - \ + # torch.arange(lq).unsqueeze(1).to(device) + rel_pos = torch.arange(lk, device=device).unsqueeze(0) - \ + torch.arange(lq, device=device).unsqueeze(1) + rel_pos = self._relative_position_bucket(rel_pos) + rel_pos_embeds = self.embedding(rel_pos) + rel_pos_embeds = rel_pos_embeds.permute(2, 0, 1).unsqueeze( + 0) # [1, N, Lq, Lk] + return rel_pos_embeds.contiguous() + + def _relative_position_bucket(self, rel_pos): + # preprocess + if self.bidirectional: + num_buckets = self.num_buckets // 2 + rel_buckets = (rel_pos > 0).long() * num_buckets + rel_pos = torch.abs(rel_pos) + else: + num_buckets = self.num_buckets + rel_buckets = 0 + rel_pos = -torch.min(rel_pos, torch.zeros_like(rel_pos)) + + # embeddings for small and large positions + max_exact = num_buckets // 2 + rel_pos_large = max_exact + (torch.log(rel_pos.float() / max_exact) / + math.log(self.max_dist / max_exact) * + (num_buckets - max_exact)).long() + rel_pos_large = torch.min( + rel_pos_large, torch.full_like(rel_pos_large, num_buckets - 1)) + rel_buckets += torch.where(rel_pos < max_exact, rel_pos, rel_pos_large) + return rel_buckets + + +class T5Encoder(nn.Module): + + def __init__(self, + vocab, + dim, + dim_attn, + dim_ffn, + num_heads, + num_layers, + num_buckets, + shared_pos=True, + dropout=0.1): + super(T5Encoder, self).__init__() + self.dim = dim + self.dim_attn = dim_attn + self.dim_ffn = dim_ffn + self.num_heads = num_heads + self.num_layers = num_layers + self.num_buckets = num_buckets + self.shared_pos = shared_pos + + # layers + self.token_embedding = vocab if isinstance(vocab, nn.Embedding) \ + else nn.Embedding(vocab, dim) + self.pos_embedding = T5RelativeEmbedding( + num_buckets, num_heads, bidirectional=True) if shared_pos else None + self.dropout = nn.Dropout(dropout) + self.blocks = nn.ModuleList([ + T5SelfAttention(dim, dim_attn, dim_ffn, num_heads, num_buckets, + shared_pos, dropout) for _ in range(num_layers) + ]) + self.norm = T5LayerNorm(dim) + + # initialize weights + self.apply(init_weights) + + def forward(self, ids, mask=None): + x = self.token_embedding(ids) + x = self.dropout(x) + e = self.pos_embedding(x.size(1), + x.size(1)) if self.shared_pos else None + for block in self.blocks: + x = block(x, mask, pos_bias=e) + x = self.norm(x) + x = self.dropout(x) + return x + + +class T5Decoder(nn.Module): + + def __init__(self, + vocab, + dim, + dim_attn, + dim_ffn, + num_heads, + num_layers, + num_buckets, + shared_pos=True, + dropout=0.1): + super(T5Decoder, self).__init__() + self.dim = dim + self.dim_attn = dim_attn + self.dim_ffn = dim_ffn + self.num_heads = num_heads + self.num_layers = num_layers + self.num_buckets = num_buckets + self.shared_pos = shared_pos + + # layers + self.token_embedding = vocab if isinstance(vocab, nn.Embedding) \ + else nn.Embedding(vocab, dim) + self.pos_embedding = T5RelativeEmbedding( + num_buckets, num_heads, bidirectional=False) if shared_pos else None + self.dropout = nn.Dropout(dropout) + self.blocks = nn.ModuleList([ + T5CrossAttention(dim, dim_attn, dim_ffn, num_heads, num_buckets, + shared_pos, dropout) for _ in range(num_layers) + ]) + self.norm = T5LayerNorm(dim) + + # initialize weights + self.apply(init_weights) + + def forward(self, ids, mask=None, encoder_states=None, encoder_mask=None): + b, s = ids.size() + + # causal mask + if mask is None: + mask = torch.tril(torch.ones(1, s, s).to(ids.device)) + elif mask.ndim == 2: + mask = torch.tril(mask.unsqueeze(1).expand(-1, s, -1)) + + # layers + x = self.token_embedding(ids) + x = self.dropout(x) + e = self.pos_embedding(x.size(1), + x.size(1)) if self.shared_pos else None + for block in self.blocks: + x = block(x, mask, encoder_states, encoder_mask, pos_bias=e) + x = self.norm(x) + x = self.dropout(x) + return x + + +class T5Model(nn.Module): + + def __init__(self, + vocab_size, + dim, + dim_attn, + dim_ffn, + num_heads, + encoder_layers, + decoder_layers, + num_buckets, + shared_pos=True, + dropout=0.1): + super(T5Model, self).__init__() + self.vocab_size = vocab_size + self.dim = dim + self.dim_attn = dim_attn + self.dim_ffn = dim_ffn + self.num_heads = num_heads + self.encoder_layers = encoder_layers + self.decoder_layers = decoder_layers + self.num_buckets = num_buckets + + # layers + self.token_embedding = nn.Embedding(vocab_size, dim) + self.encoder = T5Encoder(self.token_embedding, dim, dim_attn, dim_ffn, + num_heads, encoder_layers, num_buckets, + shared_pos, dropout) + self.decoder = T5Decoder(self.token_embedding, dim, dim_attn, dim_ffn, + num_heads, decoder_layers, num_buckets, + shared_pos, dropout) + self.head = nn.Linear(dim, vocab_size, bias=False) + + # initialize weights + self.apply(init_weights) + + def forward(self, encoder_ids, encoder_mask, decoder_ids, decoder_mask): + x = self.encoder(encoder_ids, encoder_mask) + x = self.decoder(decoder_ids, decoder_mask, x, encoder_mask) + x = self.head(x) + return x + + +def _t5(name, + encoder_only=False, + decoder_only=False, + return_tokenizer=False, + tokenizer_kwargs={}, + dtype=torch.float32, + device='cpu', + **kwargs): + # sanity check + assert not (encoder_only and decoder_only) + + # params + if encoder_only: + model_cls = T5Encoder + kwargs['vocab'] = kwargs.pop('vocab_size') + kwargs['num_layers'] = kwargs.pop('encoder_layers') + _ = kwargs.pop('decoder_layers') + elif decoder_only: + model_cls = T5Decoder + kwargs['vocab'] = kwargs.pop('vocab_size') + kwargs['num_layers'] = kwargs.pop('decoder_layers') + _ = kwargs.pop('encoder_layers') + else: + model_cls = T5Model + + # init model + with torch.device(device): + model = model_cls(**kwargs) + + # set device + model = model.to(dtype=dtype, device=device) + + # init tokenizer + if return_tokenizer: + from .tokenizers import HuggingfaceTokenizer + tokenizer = HuggingfaceTokenizer(f'google/{name}', **tokenizer_kwargs) + return model, tokenizer + else: + return model + + +def umt5_xxl(**kwargs): + cfg = dict( + vocab_size=256384, + dim=4096, + dim_attn=4096, + dim_ffn=10240, + num_heads=64, + encoder_layers=24, + decoder_layers=24, + num_buckets=32, + shared_pos=False, + dropout=0.1) + cfg.update(**kwargs) + return _t5('umt5-xxl', **cfg) + + +class T5EncoderModel: + + def __init__( + self, + text_len, + dtype=torch.bfloat16, + device=torch.cuda.current_device(), + checkpoint_path=None, + tokenizer_path=None, + shard_fn=None, + ): + self.text_len = text_len + self.dtype = dtype + self.device = device + self.checkpoint_path = checkpoint_path + self.tokenizer_path = tokenizer_path + + # init model + model = umt5_xxl( + encoder_only=True, + return_tokenizer=False, + dtype=dtype, + device=device).eval().requires_grad_(False) + logging.info(f'loading {checkpoint_path}') + model.load_state_dict(torch.load(checkpoint_path, map_location='cpu')) + self.model = model + if shard_fn is not None: + self.model = shard_fn(self.model, sync_module_states=False) + else: + self.model.to(self.device) + # init tokenizer + self.tokenizer = HuggingfaceTokenizer( + name=tokenizer_path, seq_len=text_len, clean='whitespace') + + def __call__(self, texts, device): + ids, mask = self.tokenizer( + texts, return_mask=True, add_special_tokens=True) + ids = ids.to(device) + mask = mask.to(device) + seq_lens = mask.gt(0).sum(dim=1).long() + context = self.model(ids, mask) + return [u[:v] for u, v in zip(context, seq_lens)] diff --git a/lightx2v/models/networks/motus/wan/tokenizers.py b/lightx2v/models/networks/motus/wan/tokenizers.py new file mode 100644 index 000000000..e9e167b05 --- /dev/null +++ b/lightx2v/models/networks/motus/wan/tokenizers.py @@ -0,0 +1,62 @@ +import html +import string + +import ftfy +import regex as re +from transformers import AutoTokenizer + + +def basic_clean(text): + text = ftfy.fix_text(text) + text = html.unescape(html.unescape(text)) + return text.strip() + + +def whitespace_clean(text): + text = re.sub(r"\s+", " ", text) + return text.strip() + + +def canonicalize(text, keep_punctuation_exact_string=None): + text = text.replace("_", " ") + if keep_punctuation_exact_string: + text = keep_punctuation_exact_string.join( + part.translate(str.maketrans("", "", string.punctuation)) for part in text.split(keep_punctuation_exact_string) + ) + else: + text = text.translate(str.maketrans("", "", string.punctuation)) + text = text.lower() + text = re.sub(r"\s+", " ", text) + return text.strip() + + +class HuggingfaceTokenizer: + def __init__(self, name, seq_len=None, clean=None, **kwargs): + assert clean in (None, "whitespace", "lower", "canonicalize") + self.name = name + self.seq_len = seq_len + self.clean = clean + self.tokenizer = AutoTokenizer.from_pretrained(name, **kwargs) + self.vocab_size = self.tokenizer.vocab_size + + def __call__(self, sequence, **kwargs): + return_mask = kwargs.pop("return_mask", False) + local_kwargs = {"return_tensors": "pt"} + if self.seq_len is not None: + local_kwargs.update({"padding": "max_length", "truncation": True, "max_length": self.seq_len}) + local_kwargs.update(**kwargs) + if isinstance(sequence, str): + sequence = [sequence] + if self.clean: + sequence = [self._clean(item) for item in sequence] + ids = self.tokenizer(sequence, **local_kwargs) + return (ids.input_ids, ids.attention_mask) if return_mask else ids.input_ids + + def _clean(self, text): + if self.clean == "whitespace": + text = whitespace_clean(basic_clean(text)) + elif self.clean == "lower": + text = whitespace_clean(basic_clean(text)).lower() + elif self.clean == "canonicalize": + text = canonicalize(basic_clean(text)) + return text diff --git a/lightx2v/models/networks/motus/wan/vae2_2.py b/lightx2v/models/networks/motus/wan/vae2_2.py new file mode 100644 index 000000000..b705ef66f --- /dev/null +++ b/lightx2v/models/networks/motus/wan/vae2_2.py @@ -0,0 +1,1041 @@ +# Copyright 2024-2025 The Alibaba Wan Team Authors. All rights reserved. +import logging + +import torch +import torch.cuda.amp as amp +import torch.nn as nn +import torch.nn.functional as F +from einops import rearrange + +__all__ = [ + "Wan2_2_VAE", +] + +CACHE_T = 2 + + +class CausalConv3d(nn.Conv3d): + """ + Causal 3d convolusion. + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self._padding = ( + self.padding[2], + self.padding[2], + self.padding[1], + self.padding[1], + 2 * self.padding[0], + 0, + ) + self.padding = (0, 0, 0) + + def forward(self, x, cache_x=None): + padding = list(self._padding) + if cache_x is not None and self._padding[4] > 0: + cache_x = cache_x.to(x.device) + x = torch.cat([cache_x, x], dim=2) + padding[4] -= cache_x.shape[2] + x = F.pad(x, padding) + + return super().forward(x) + + +class RMS_norm(nn.Module): + + def __init__(self, dim, channel_first=True, images=True, bias=False): + super().__init__() + broadcastable_dims = (1, 1, 1) if not images else (1, 1) + shape = (dim, *broadcastable_dims) if channel_first else (dim,) + + self.channel_first = channel_first + self.scale = dim**0.5 + self.gamma = nn.Parameter(torch.ones(shape)) + self.bias = nn.Parameter(torch.zeros(shape)) if bias else 0.0 + + def forward(self, x): + return (F.normalize(x, dim=(1 if self.channel_first else -1)) * + self.scale * self.gamma + self.bias) + + +class Upsample(nn.Upsample): + + def forward(self, x): + """ + Fix bfloat16 support for nearest neighbor interpolation. + """ + return super().forward(x.float()).type_as(x) + + +class Resample(nn.Module): + + def __init__(self, dim, mode): + assert mode in ( + "none", + "upsample2d", + "upsample3d", + "downsample2d", + "downsample3d", + ) + super().__init__() + self.dim = dim + self.mode = mode + + # layers + if mode == "upsample2d": + self.resample = nn.Sequential( + Upsample(scale_factor=(2.0, 2.0), mode="nearest-exact"), + nn.Conv2d(dim, dim, 3, padding=1), + ) + elif mode == "upsample3d": + self.resample = nn.Sequential( + Upsample(scale_factor=(2.0, 2.0), mode="nearest-exact"), + nn.Conv2d(dim, dim, 3, padding=1), + # nn.Conv2d(dim, dim//2, 3, padding=1) + ) + self.time_conv = CausalConv3d( + dim, dim * 2, (3, 1, 1), padding=(1, 0, 0)) + elif mode == "downsample2d": + self.resample = nn.Sequential( + nn.ZeroPad2d((0, 1, 0, 1)), + nn.Conv2d(dim, dim, 3, stride=(2, 2))) + elif mode == "downsample3d": + self.resample = nn.Sequential( + nn.ZeroPad2d((0, 1, 0, 1)), + nn.Conv2d(dim, dim, 3, stride=(2, 2))) + self.time_conv = CausalConv3d( + dim, dim, (3, 1, 1), stride=(2, 1, 1), padding=(0, 0, 0)) + else: + self.resample = nn.Identity() + + def forward(self, x, feat_cache=None, feat_idx=[0]): + b, c, t, h, w = x.size() + if self.mode == "upsample3d": + if feat_cache is not None: + idx = feat_idx[0] + if feat_cache[idx] is None: + feat_cache[idx] = "Rep" + feat_idx[0] += 1 + else: + cache_x = x[:, :, -CACHE_T:, :, :].clone() + if (cache_x.shape[2] < 2 and feat_cache[idx] is not None and + feat_cache[idx] != "Rep"): + # cache last frame of last two chunk + cache_x = torch.cat( + [ + feat_cache[idx][:, :, -1, :, :].unsqueeze(2).to( + cache_x.device), + cache_x, + ], + dim=2, + ) + if (cache_x.shape[2] < 2 and feat_cache[idx] is not None and + feat_cache[idx] == "Rep"): + cache_x = torch.cat( + [ + torch.zeros_like(cache_x).to(cache_x.device), + cache_x + ], + dim=2, + ) + if feat_cache[idx] == "Rep": + x = self.time_conv(x) + else: + x = self.time_conv(x, feat_cache[idx]) + feat_cache[idx] = cache_x + feat_idx[0] += 1 + x = x.reshape(b, 2, c, t, h, w) + x = torch.stack((x[:, 0, :, :, :, :], x[:, 1, :, :, :, :]), + 3) + x = x.reshape(b, c, t * 2, h, w) + t = x.shape[2] + x = rearrange(x, "b c t h w -> (b t) c h w") + x = self.resample(x) + x = rearrange(x, "(b t) c h w -> b c t h w", t=t) + + if self.mode == "downsample3d": + if feat_cache is not None: + idx = feat_idx[0] + if feat_cache[idx] is None: + feat_cache[idx] = x.clone() + feat_idx[0] += 1 + else: + cache_x = x[:, :, -1:, :, :].clone() + x = self.time_conv( + torch.cat([feat_cache[idx][:, :, -1:, :, :], x], 2)) + feat_cache[idx] = cache_x + feat_idx[0] += 1 + return x + + def init_weight(self, conv): + conv_weight = conv.weight.detach().clone() + nn.init.zeros_(conv_weight) + c1, c2, t, h, w = conv_weight.size() + one_matrix = torch.eye(c1, c2) + init_matrix = one_matrix + nn.init.zeros_(conv_weight) + conv_weight.data[:, :, 1, 0, 0] = init_matrix # * 0.5 + conv.weight = nn.Parameter(conv_weight) + nn.init.zeros_(conv.bias.data) + + def init_weight2(self, conv): + conv_weight = conv.weight.data.detach().clone() + nn.init.zeros_(conv_weight) + c1, c2, t, h, w = conv_weight.size() + init_matrix = torch.eye(c1 // 2, c2) + conv_weight[:c1 // 2, :, -1, 0, 0] = init_matrix + conv_weight[c1 // 2:, :, -1, 0, 0] = init_matrix + conv.weight = nn.Parameter(conv_weight) + nn.init.zeros_(conv.bias.data) + + +class ResidualBlock(nn.Module): + + def __init__(self, in_dim, out_dim, dropout=0.0): + super().__init__() + self.in_dim = in_dim + self.out_dim = out_dim + + # layers + self.residual = nn.Sequential( + RMS_norm(in_dim, images=False), + nn.SiLU(), + CausalConv3d(in_dim, out_dim, 3, padding=1), + RMS_norm(out_dim, images=False), + nn.SiLU(), + nn.Dropout(dropout), + CausalConv3d(out_dim, out_dim, 3, padding=1), + ) + self.shortcut = ( + CausalConv3d(in_dim, out_dim, 1) + if in_dim != out_dim else nn.Identity()) + + def forward(self, x, feat_cache=None, feat_idx=[0]): + h = self.shortcut(x) + for layer in self.residual: + if isinstance(layer, CausalConv3d) and feat_cache is not None: + idx = feat_idx[0] + cache_x = x[:, :, -CACHE_T:, :, :].clone() + if cache_x.shape[2] < 2 and feat_cache[idx] is not None: + # cache last frame of last two chunk + cache_x = torch.cat( + [ + feat_cache[idx][:, :, -1, :, :].unsqueeze(2).to( + cache_x.device), + cache_x, + ], + dim=2, + ) + x = layer(x, feat_cache[idx]) + feat_cache[idx] = cache_x + feat_idx[0] += 1 + else: + x = layer(x) + return x + h + + +class AttentionBlock(nn.Module): + """ + Causal self-attention with a single head. + """ + + def __init__(self, dim): + super().__init__() + self.dim = dim + + # layers + self.norm = RMS_norm(dim) + self.to_qkv = nn.Conv2d(dim, dim * 3, 1) + self.proj = nn.Conv2d(dim, dim, 1) + + # zero out the last layer params + nn.init.zeros_(self.proj.weight) + + def forward(self, x): + identity = x + b, c, t, h, w = x.size() + x = rearrange(x, "b c t h w -> (b t) c h w") + x = self.norm(x) + # compute query, key, value + q, k, v = ( + self.to_qkv(x).reshape(b * t, 1, c * 3, + -1).permute(0, 1, 3, + 2).contiguous().chunk(3, dim=-1)) + + # apply attention + x = F.scaled_dot_product_attention( + q, + k, + v, + ) + x = x.squeeze(1).permute(0, 2, 1).reshape(b * t, c, h, w) + + # output + x = self.proj(x) + x = rearrange(x, "(b t) c h w-> b c t h w", t=t) + return x + identity + + +def patchify(x, patch_size): + if patch_size == 1: + return x + if x.dim() == 4: + x = rearrange( + x, "b c (h q) (w r) -> b (c r q) h w", q=patch_size, r=patch_size) + elif x.dim() == 5: + x = rearrange( + x, + "b c f (h q) (w r) -> b (c r q) f h w", + q=patch_size, + r=patch_size, + ) + else: + raise ValueError(f"Invalid input shape: {x.shape}") + + return x + + +def unpatchify(x, patch_size): + if patch_size == 1: + return x + + if x.dim() == 4: + x = rearrange( + x, "b (c r q) h w -> b c (h q) (w r)", q=patch_size, r=patch_size) + elif x.dim() == 5: + x = rearrange( + x, + "b (c r q) f h w -> b c f (h q) (w r)", + q=patch_size, + r=patch_size, + ) + return x + + +class AvgDown3D(nn.Module): + + def __init__( + self, + in_channels, + out_channels, + factor_t, + factor_s=1, + ): + super().__init__() + self.in_channels = in_channels + self.out_channels = out_channels + self.factor_t = factor_t + self.factor_s = factor_s + self.factor = self.factor_t * self.factor_s * self.factor_s + + assert in_channels * self.factor % out_channels == 0 + self.group_size = in_channels * self.factor // out_channels + + def forward(self, x: torch.Tensor) -> torch.Tensor: + pad_t = (self.factor_t - x.shape[2] % self.factor_t) % self.factor_t + pad = (0, 0, 0, 0, pad_t, 0) + x = F.pad(x, pad) + B, C, T, H, W = x.shape + x = x.view( + B, + C, + T // self.factor_t, + self.factor_t, + H // self.factor_s, + self.factor_s, + W // self.factor_s, + self.factor_s, + ) + x = x.permute(0, 1, 3, 5, 7, 2, 4, 6).contiguous() + x = x.view( + B, + C * self.factor, + T // self.factor_t, + H // self.factor_s, + W // self.factor_s, + ) + x = x.view( + B, + self.out_channels, + self.group_size, + T // self.factor_t, + H // self.factor_s, + W // self.factor_s, + ) + x = x.mean(dim=2) + return x + + +class DupUp3D(nn.Module): + + def __init__( + self, + in_channels: int, + out_channels: int, + factor_t, + factor_s=1, + ): + super().__init__() + self.in_channels = in_channels + self.out_channels = out_channels + + self.factor_t = factor_t + self.factor_s = factor_s + self.factor = self.factor_t * self.factor_s * self.factor_s + + assert out_channels * self.factor % in_channels == 0 + self.repeats = out_channels * self.factor // in_channels + + def forward(self, x: torch.Tensor, first_chunk=False) -> torch.Tensor: + x = x.repeat_interleave(self.repeats, dim=1) + x = x.view( + x.size(0), + self.out_channels, + self.factor_t, + self.factor_s, + self.factor_s, + x.size(2), + x.size(3), + x.size(4), + ) + x = x.permute(0, 1, 5, 2, 6, 3, 7, 4).contiguous() + x = x.view( + x.size(0), + self.out_channels, + x.size(2) * self.factor_t, + x.size(4) * self.factor_s, + x.size(6) * self.factor_s, + ) + if first_chunk: + x = x[:, :, self.factor_t - 1:, :, :] + return x + + +class Down_ResidualBlock(nn.Module): + + def __init__(self, + in_dim, + out_dim, + dropout, + mult, + temperal_downsample=False, + down_flag=False): + super().__init__() + + # Shortcut path with downsample + self.avg_shortcut = AvgDown3D( + in_dim, + out_dim, + factor_t=2 if temperal_downsample else 1, + factor_s=2 if down_flag else 1, + ) + + # Main path with residual blocks and downsample + downsamples = [] + for _ in range(mult): + downsamples.append(ResidualBlock(in_dim, out_dim, dropout)) + in_dim = out_dim + + # Add the final downsample block + if down_flag: + mode = "downsample3d" if temperal_downsample else "downsample2d" + downsamples.append(Resample(out_dim, mode=mode)) + + self.downsamples = nn.Sequential(*downsamples) + + def forward(self, x, feat_cache=None, feat_idx=[0]): + x_copy = x.clone() + for module in self.downsamples: + x = module(x, feat_cache, feat_idx) + + return x + self.avg_shortcut(x_copy) + + +class Up_ResidualBlock(nn.Module): + + def __init__(self, + in_dim, + out_dim, + dropout, + mult, + temperal_upsample=False, + up_flag=False): + super().__init__() + # Shortcut path with upsample + if up_flag: + self.avg_shortcut = DupUp3D( + in_dim, + out_dim, + factor_t=2 if temperal_upsample else 1, + factor_s=2 if up_flag else 1, + ) + else: + self.avg_shortcut = None + + # Main path with residual blocks and upsample + upsamples = [] + for _ in range(mult): + upsamples.append(ResidualBlock(in_dim, out_dim, dropout)) + in_dim = out_dim + + # Add the final upsample block + if up_flag: + mode = "upsample3d" if temperal_upsample else "upsample2d" + upsamples.append(Resample(out_dim, mode=mode)) + + self.upsamples = nn.Sequential(*upsamples) + + def forward(self, x, feat_cache=None, feat_idx=[0], first_chunk=False): + x_main = x.clone() + for module in self.upsamples: + x_main = module(x_main, feat_cache, feat_idx) + if self.avg_shortcut is not None: + x_shortcut = self.avg_shortcut(x, first_chunk) + return x_main + x_shortcut + else: + return x_main + + +class Encoder3d(nn.Module): + + def __init__( + self, + dim=128, + z_dim=4, + dim_mult=[1, 2, 4, 4], + num_res_blocks=2, + attn_scales=[], + temperal_downsample=[True, True, False], + dropout=0.0, + ): + super().__init__() + self.dim = dim + self.z_dim = z_dim + self.dim_mult = dim_mult + self.num_res_blocks = num_res_blocks + self.attn_scales = attn_scales + self.temperal_downsample = temperal_downsample + + # dimensions + dims = [dim * u for u in [1] + dim_mult] + scale = 1.0 + + # init block + self.conv1 = CausalConv3d(12, dims[0], 3, padding=1) + + # downsample blocks + downsamples = [] + for i, (in_dim, out_dim) in enumerate(zip(dims[:-1], dims[1:])): + t_down_flag = ( + temperal_downsample[i] + if i < len(temperal_downsample) else False) + downsamples.append( + Down_ResidualBlock( + in_dim=in_dim, + out_dim=out_dim, + dropout=dropout, + mult=num_res_blocks, + temperal_downsample=t_down_flag, + down_flag=i != len(dim_mult) - 1, + )) + scale /= 2.0 + self.downsamples = nn.Sequential(*downsamples) + + # middle blocks + self.middle = nn.Sequential( + ResidualBlock(out_dim, out_dim, dropout), + AttentionBlock(out_dim), + ResidualBlock(out_dim, out_dim, dropout), + ) + + # # output blocks + self.head = nn.Sequential( + RMS_norm(out_dim, images=False), + nn.SiLU(), + CausalConv3d(out_dim, z_dim, 3, padding=1), + ) + + def forward(self, x, feat_cache=None, feat_idx=[0]): + + if feat_cache is not None: + idx = feat_idx[0] + cache_x = x[:, :, -CACHE_T:, :, :].clone() + if cache_x.shape[2] < 2 and feat_cache[idx] is not None: + cache_x = torch.cat( + [ + feat_cache[idx][:, :, -1, :, :].unsqueeze(2).to( + cache_x.device), + cache_x, + ], + dim=2, + ) + x = self.conv1(x, feat_cache[idx]) + feat_cache[idx] = cache_x + feat_idx[0] += 1 + else: + x = self.conv1(x) + + ## downsamples + for layer in self.downsamples: + if feat_cache is not None: + x = layer(x, feat_cache, feat_idx) + else: + x = layer(x) + + ## middle + for layer in self.middle: + if isinstance(layer, ResidualBlock) and feat_cache is not None: + x = layer(x, feat_cache, feat_idx) + else: + x = layer(x) + + ## head + for layer in self.head: + if isinstance(layer, CausalConv3d) and feat_cache is not None: + idx = feat_idx[0] + cache_x = x[:, :, -CACHE_T:, :, :].clone() + if cache_x.shape[2] < 2 and feat_cache[idx] is not None: + cache_x = torch.cat( + [ + feat_cache[idx][:, :, -1, :, :].unsqueeze(2).to( + cache_x.device), + cache_x, + ], + dim=2, + ) + x = layer(x, feat_cache[idx]) + feat_cache[idx] = cache_x + feat_idx[0] += 1 + else: + x = layer(x) + + return x + + +class Decoder3d(nn.Module): + + def __init__( + self, + dim=128, + z_dim=4, + dim_mult=[1, 2, 4, 4], + num_res_blocks=2, + attn_scales=[], + temperal_upsample=[False, True, True], + dropout=0.0, + ): + super().__init__() + self.dim = dim + self.z_dim = z_dim + self.dim_mult = dim_mult + self.num_res_blocks = num_res_blocks + self.attn_scales = attn_scales + self.temperal_upsample = temperal_upsample + + # dimensions + dims = [dim * u for u in [dim_mult[-1]] + dim_mult[::-1]] + scale = 1.0 / 2**(len(dim_mult) - 2) + # init block + self.conv1 = CausalConv3d(z_dim, dims[0], 3, padding=1) + + # middle blocks + self.middle = nn.Sequential( + ResidualBlock(dims[0], dims[0], dropout), + AttentionBlock(dims[0]), + ResidualBlock(dims[0], dims[0], dropout), + ) + + # upsample blocks + upsamples = [] + for i, (in_dim, out_dim) in enumerate(zip(dims[:-1], dims[1:])): + t_up_flag = temperal_upsample[i] if i < len( + temperal_upsample) else False + upsamples.append( + Up_ResidualBlock( + in_dim=in_dim, + out_dim=out_dim, + dropout=dropout, + mult=num_res_blocks + 1, + temperal_upsample=t_up_flag, + up_flag=i != len(dim_mult) - 1, + )) + self.upsamples = nn.Sequential(*upsamples) + + # output blocks + self.head = nn.Sequential( + RMS_norm(out_dim, images=False), + nn.SiLU(), + CausalConv3d(out_dim, 12, 3, padding=1), + ) + + def forward(self, x, feat_cache=None, feat_idx=[0], first_chunk=False): + if feat_cache is not None: + idx = feat_idx[0] + cache_x = x[:, :, -CACHE_T:, :, :].clone() + if cache_x.shape[2] < 2 and feat_cache[idx] is not None: + cache_x = torch.cat( + [ + feat_cache[idx][:, :, -1, :, :].unsqueeze(2).to( + cache_x.device), + cache_x, + ], + dim=2, + ) + x = self.conv1(x, feat_cache[idx]) + feat_cache[idx] = cache_x + feat_idx[0] += 1 + else: + x = self.conv1(x) + + for layer in self.middle: + if isinstance(layer, ResidualBlock) and feat_cache is not None: + x = layer(x, feat_cache, feat_idx) + else: + x = layer(x) + + ## upsamples + for layer in self.upsamples: + if feat_cache is not None: + x = layer(x, feat_cache, feat_idx, first_chunk) + else: + x = layer(x) + + ## head + for layer in self.head: + if isinstance(layer, CausalConv3d) and feat_cache is not None: + idx = feat_idx[0] + cache_x = x[:, :, -CACHE_T:, :, :].clone() + if cache_x.shape[2] < 2 and feat_cache[idx] is not None: + cache_x = torch.cat( + [ + feat_cache[idx][:, :, -1, :, :].unsqueeze(2).to( + cache_x.device), + cache_x, + ], + dim=2, + ) + x = layer(x, feat_cache[idx]) + feat_cache[idx] = cache_x + feat_idx[0] += 1 + else: + x = layer(x) + return x + + +def count_conv3d(model): + count = 0 + for m in model.modules(): + if isinstance(m, CausalConv3d): + count += 1 + return count + + +class WanVAE_(nn.Module): + + def __init__( + self, + dim=160, + dec_dim=256, + z_dim=16, + dim_mult=[1, 2, 4, 4], + num_res_blocks=2, + attn_scales=[], + temperal_downsample=[True, True, False], + dropout=0.0, + ): + super().__init__() + self.dim = dim + self.z_dim = z_dim + self.dim_mult = dim_mult + self.num_res_blocks = num_res_blocks + self.attn_scales = attn_scales + self.temperal_downsample = temperal_downsample + self.temperal_upsample = temperal_downsample[::-1] + + # modules + self.encoder = Encoder3d( + dim, + z_dim * 2, + dim_mult, + num_res_blocks, + attn_scales, + self.temperal_downsample, + dropout, + ) + self.conv1 = CausalConv3d(z_dim * 2, z_dim * 2, 1) + self.conv2 = CausalConv3d(z_dim, z_dim, 1) + self.decoder = Decoder3d( + dec_dim, + z_dim, + dim_mult, + num_res_blocks, + attn_scales, + self.temperal_upsample, + dropout, + ) + + def forward(self, x, scale=[0, 1]): + mu = self.encode(x, scale) + x_recon = self.decode(mu, scale) + return x_recon, mu + + def encode(self, x, scale): + self.clear_cache() + x = patchify(x, patch_size=2) + t = x.shape[2] + iter_ = 1 + (t - 1) // 4 + for i in range(iter_): + self._enc_conv_idx = [0] + if i == 0: + out = self.encoder( + x[:, :, :1, :, :], + feat_cache=self._enc_feat_map, + feat_idx=self._enc_conv_idx, + ) + else: + out_ = self.encoder( + x[:, :, 1 + 4 * (i - 1):1 + 4 * i, :, :], + feat_cache=self._enc_feat_map, + feat_idx=self._enc_conv_idx, + ) + out = torch.cat([out, out_], 2) + mu, log_var = self.conv1(out).chunk(2, dim=1) + if isinstance(scale[0], torch.Tensor): + mu = (mu - scale[0].view(1, self.z_dim, 1, 1, 1)) * scale[1].view( + 1, self.z_dim, 1, 1, 1) + else: + mu = (mu - scale[0]) * scale[1] + self.clear_cache() + return mu + + def decode(self, z, scale): + self.clear_cache() + if isinstance(scale[0], torch.Tensor): + z = z / scale[1].view(1, self.z_dim, 1, 1, 1) + scale[0].view( + 1, self.z_dim, 1, 1, 1) + else: + z = z / scale[1] + scale[0] + iter_ = z.shape[2] + x = self.conv2(z) + for i in range(iter_): + self._conv_idx = [0] + if i == 0: + out = self.decoder( + x[:, :, i:i + 1, :, :], + feat_cache=self._feat_map, + feat_idx=self._conv_idx, + first_chunk=True, + ) + else: + out_ = self.decoder( + x[:, :, i:i + 1, :, :], + feat_cache=self._feat_map, + feat_idx=self._conv_idx, + ) + out = torch.cat([out, out_], 2) + out = unpatchify(out, patch_size=2) + self.clear_cache() + return out + + def reparameterize(self, mu, log_var): + std = torch.exp(0.5 * log_var) + eps = torch.randn_like(std) + return eps * std + mu + + def sample(self, imgs, deterministic=False): + mu, log_var = self.encode(imgs) + if deterministic: + return mu + std = torch.exp(0.5 * log_var.clamp(-30.0, 20.0)) + return mu + std * torch.randn_like(std) + + def clear_cache(self): + self._conv_num = count_conv3d(self.decoder) + self._conv_idx = [0] + self._feat_map = [None] * self._conv_num + # cache encode + self._enc_conv_num = count_conv3d(self.encoder) + self._enc_conv_idx = [0] + self._enc_feat_map = [None] * self._enc_conv_num + + +def _video_vae(pretrained_path=None, z_dim=16, dim=160, device="cpu", **kwargs): + # params + cfg = dict( + dim=dim, + z_dim=z_dim, + dim_mult=[1, 2, 4, 4], + num_res_blocks=2, + attn_scales=[], + temperal_downsample=[True, True, True], + dropout=0.0, + ) + cfg.update(**kwargs) + + # init model + with torch.device("meta"): + model = WanVAE_(**cfg) + + # load checkpoint + logging.info(f"loading {pretrained_path}") + model.load_state_dict( + torch.load(pretrained_path, map_location=device), assign=True) + + return model + + +class Wan2_2_VAE: + + def __init__( + self, + z_dim=48, + c_dim=160, + vae_pth=None, + dim_mult=[1, 2, 4, 4], + temperal_downsample=[False, True, True], + dtype=torch.float, + device="cuda", + ): + + self.dtype = dtype + self.device = device + + mean = torch.tensor( + [ + -0.2289, + -0.0052, + -0.1323, + -0.2339, + -0.2799, + 0.0174, + 0.1838, + 0.1557, + -0.1382, + 0.0542, + 0.2813, + 0.0891, + 0.1570, + -0.0098, + 0.0375, + -0.1825, + -0.2246, + -0.1207, + -0.0698, + 0.5109, + 0.2665, + -0.2108, + -0.2158, + 0.2502, + -0.2055, + -0.0322, + 0.1109, + 0.1567, + -0.0729, + 0.0899, + -0.2799, + -0.1230, + -0.0313, + -0.1649, + 0.0117, + 0.0723, + -0.2839, + -0.2083, + -0.0520, + 0.3748, + 0.0152, + 0.1957, + 0.1433, + -0.2944, + 0.3573, + -0.0548, + -0.1681, + -0.0667, + ], + dtype=dtype, + device=device, + ) + std = torch.tensor( + [ + 0.4765, + 1.0364, + 0.4514, + 1.1677, + 0.5313, + 0.4990, + 0.4818, + 0.5013, + 0.8158, + 1.0344, + 0.5894, + 1.0901, + 0.6885, + 0.6165, + 0.8454, + 0.4978, + 0.5759, + 0.3523, + 0.7135, + 0.6804, + 0.5833, + 1.4146, + 0.8986, + 0.5659, + 0.7069, + 0.5338, + 0.4889, + 0.4917, + 0.4069, + 0.4999, + 0.6866, + 0.4093, + 0.5709, + 0.6065, + 0.6415, + 0.4944, + 0.5726, + 1.2042, + 0.5458, + 1.6887, + 0.3971, + 1.0600, + 0.3943, + 0.5537, + 0.5444, + 0.4089, + 0.7468, + 0.7744, + ], + dtype=dtype, + device=device, + ) + self.scale = [mean, 1.0 / std] + + # init model + self.model = ( + _video_vae( + pretrained_path=vae_pth, + z_dim=z_dim, + dim=c_dim, + dim_mult=dim_mult, + temperal_downsample=temperal_downsample, + ).eval().requires_grad_(False).to(device)) + + def encode(self, videos): + with torch.amp.autocast("cuda", dtype=self.dtype): + return self.model.encode(videos, self.scale) + + def decode(self, zs): + try: + if not isinstance(zs, list): + raise TypeError("zs should be a list") + with amp.autocast(dtype=self.dtype): + return [ + self.model.decode(u.unsqueeze(0), + self.scale).float().clamp_(-1, + 1).squeeze(0) + for u in zs + ] + except TypeError as e: + logging.info(e) + return None diff --git a/lightx2v/models/networks/motus/wan_model.py b/lightx2v/models/networks/motus/wan_model.py new file mode 100644 index 000000000..2ea6d36b1 --- /dev/null +++ b/lightx2v/models/networks/motus/wan_model.py @@ -0,0 +1,79 @@ +import json +import logging +import os +from typing import Any, Dict, List, Optional + +import torch +import torch.nn as nn + +from lightx2v.models.networks.motus.primitives import sinusoidal_embedding_1d +from lightx2v.models.networks.motus.wan.model import WanModel +from lightx2v.models.networks.motus.wan.vae2_2 import Wan2_2_VAE + +try: + from safetensors.torch import load_file as safe_load_file +except Exception: + safe_load_file = None + +logger = logging.getLogger(__name__) + + +def _strip_known_prefixes_for_wan(sd: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]: + if not isinstance(sd, dict): + return sd + if not any(key.startswith("dit.") for key in sd.keys()): + return sd + return {(key[4:] if key.startswith("dit.") else key): value for key, value in sd.items()} + + +class WanVideoModel(nn.Module): + def __init__(self, model_config: Dict[str, Any], vae_path: str, device: str = "cuda", precision: str = "bfloat16"): + super().__init__() + self.device = torch.device(device) + self.precision = {"float32": torch.float32, "float16": torch.float16, "bfloat16": torch.bfloat16}[precision] + self.wan_model = WanModel(**model_config) + self.wan_model.to(device=self.device, dtype=self.precision) + self.vae = Wan2_2_VAE(vae_pth=vae_path, device=self.device) + + def encode_video(self, video_pixels: torch.Tensor) -> torch.Tensor: + with torch.no_grad(): + return self.vae.encode(video_pixels) + + def decode_video(self, video_latents: torch.Tensor) -> torch.Tensor: + with torch.no_grad(): + return torch.stack([self.vae.decode([video_latents[i]])[0] for i in range(video_latents.shape[0])], dim=0) + + @classmethod + def from_config(cls, config_path: str, vae_path: str, device: str = "cuda", precision: str = "bfloat16"): + config_json_path = os.path.join(config_path, "config.json") + with open(config_json_path, "r") as file: + model_config = json.load(file) + return cls(model_config=model_config, vae_path=vae_path, device=device, precision=precision) + + @classmethod + def from_pretrained(cls, checkpoint_path: str, vae_path: str, config_path: Optional[str] = None, device: str = "cuda", precision: str = "bfloat16"): + config_path = config_path or checkpoint_path + config_json_path = os.path.join(config_path, "config.json") + with open(config_json_path, "r") as file: + model_config = json.load(file) + model = cls(model_config=model_config, vae_path=vae_path, device=device, precision=precision) + + if checkpoint_path.endswith(".pt"): + loaded = torch.load(checkpoint_path, map_location="cpu") + state_dict = loaded["model"] if isinstance(loaded, dict) and "model" in loaded else loaded + elif checkpoint_path.endswith(".bin") or checkpoint_path.endswith(".safetensors"): + if checkpoint_path.endswith(".safetensors"): + if safe_load_file is None: + raise RuntimeError("safetensors is not installed") + state_dict = safe_load_file(checkpoint_path, device="cpu") + else: + loaded = torch.load(checkpoint_path, map_location="cpu") + state_dict = loaded.get("state_dict", loaded.get("model", loaded)) if isinstance(loaded, dict) else loaded + else: + loaded_model = WanModel.from_pretrained(checkpoint_path) + model.wan_model.load_state_dict(loaded_model.state_dict(), strict=False) + return model + + state_dict = _strip_known_prefixes_for_wan(state_dict) + model.wan_model.load_state_dict(state_dict, strict=False) + return model diff --git a/lightx2v/models/runners/motus/__init__.py b/lightx2v/models/runners/motus/__init__.py new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/lightx2v/models/runners/motus/__init__.py @@ -0,0 +1 @@ + diff --git a/lightx2v/models/runners/motus/motus_runner.py b/lightx2v/models/runners/motus/motus_runner.py new file mode 100644 index 000000000..022e9d4bd --- /dev/null +++ b/lightx2v/models/runners/motus/motus_runner.py @@ -0,0 +1,95 @@ +import json +from pathlib import Path + +import numpy as np +import torch +from loguru import logger + +from lightx2v.models.networks.motus.model import MotusModel +from lightx2v.models.runners.base_runner import BaseRunner +from lightx2v.utils.registry_factory import RUNNER_REGISTER +from lightx2v.utils.utils import save_to_video +from lightx2v.server.metrics import monitor_cli +from lightx2v.utils.profiler import * +from lightx2v_platform.base.global_var import AI_DEVICE + + +@RUNNER_REGISTER("motus") +class MotusRunner(BaseRunner): + def __init__(self, config): + super().__init__(config) + self.device = torch.device(AI_DEVICE) + self.adapter = None + + @ProfilingContext4DebugL2("Load models") + def init_modules(self): + self.adapter = MotusModel(self.config, self.device) + + def _load_state_value(self, state_path: str): + state_path = str(Path(state_path).expanduser().resolve()) + suffix = Path(state_path).suffix.lower() + if suffix == ".npy": + return np.load(state_path) + if suffix in [".pt", ".pth"]: + value = torch.load(state_path, map_location="cpu") + if isinstance(value, dict): + for key in ["state", "qpos", "joint_state", "initial_state"]: + if key in value: + return value[key] + return value + if suffix == ".json": + with open(state_path, "r") as f: + value = json.load(f) + if isinstance(value, dict): + for key in ["state", "qpos", "joint_state", "initial_state"]: + if key in value: + return value[key] + return value + if suffix in [".txt", ".csv"]: + text = Path(state_path).read_text().strip().replace("\n", ",") + return [float(item) for item in text.split(",") if item.strip()] + raise ValueError(f"Unsupported state file format: {state_path}") + + def _resolve_action_output_path(self): + if self.input_info.save_action_path: + return str(Path(self.input_info.save_action_path).expanduser().resolve()) + return str(Path(self.input_info.save_result_path).expanduser().resolve().with_suffix(".actions.json")) + + def _save_outputs(self, pred_frames: torch.Tensor, pred_actions: torch.Tensor): + video_path = str(Path(self.input_info.save_result_path).expanduser().resolve()) + action_path = self._resolve_action_output_path() + + video = pred_frames.clamp(0, 1).permute(0, 2, 3, 1).contiguous() + save_to_video(video, video_path, fps=float(self.config.get("fps", 4)), method="ffmpeg") + + Path(action_path).parent.mkdir(parents=True, exist_ok=True) + with open(action_path, "w") as f: + json.dump(pred_actions.detach().cpu().float().tolist(), f, ensure_ascii=False, indent=2) + + logger.info(f"Saved Motus video to {video_path}") + logger.info(f"Saved Motus actions to {action_path}") + + @ProfilingContext4DebugL1("RUN pipeline", recorder_mode=GET_RECORDER_MODE(), metrics_func=monitor_cli.lightx2v_worker_request_duration, metrics_labels=["MotusRunner"]) + def run_pipeline(self, input_info): + self.input_info = input_info + if self.adapter is None: + self.init_modules() + + if not self.input_info.image_path: + raise ValueError("Motus requires `image_path`.") + if not self.input_info.state_path: + raise ValueError("Motus requires `state_path`.") + if not self.input_info.prompt: + raise ValueError("Motus requires `prompt`.") + if not self.input_info.save_result_path: + raise ValueError("Motus requires `save_result_path`.") + + state_value = self._load_state_value(self.input_info.state_path) + pred_frames, pred_actions = self.adapter.infer( + image_path=self.input_info.image_path, + prompt=self.input_info.prompt, + state_value=state_value, + num_inference_steps=int(self.config.get("num_inference_steps", 10)), + ) + self._save_outputs(pred_frames, pred_actions) + diff --git a/lightx2v/models/schedulers/motus/__init__.py b/lightx2v/models/schedulers/motus/__init__.py new file mode 100644 index 000000000..f20aaf60b --- /dev/null +++ b/lightx2v/models/schedulers/motus/__init__.py @@ -0,0 +1,3 @@ +from .scheduler import MotusScheduler + +__all__ = ["MotusScheduler"] diff --git a/lightx2v/models/schedulers/motus/scheduler.py b/lightx2v/models/schedulers/motus/scheduler.py new file mode 100644 index 000000000..faf198ada --- /dev/null +++ b/lightx2v/models/schedulers/motus/scheduler.py @@ -0,0 +1,39 @@ +import torch + +from lightx2v.models.schedulers.scheduler import BaseScheduler + + +class MotusScheduler(BaseScheduler): + def __init__(self, config): + super().__init__(config) + self.video_latents = None + self.action_latents = None + self.timesteps = None + + def prepare(self, seed, condition_frame_latent, action_shape, dtype, device): + batch, channels, _, latent_h, latent_w = condition_frame_latent.shape + total_latent_frames = 1 + self.config["num_video_frames"] // 4 + generator = None if seed is None else torch.Generator(device=device).manual_seed(seed) + + self.video_latents = torch.randn( + (batch, channels, total_latent_frames, latent_h, latent_w), + device=device, + dtype=dtype, + generator=generator, + ) + self.video_latents[:, :, 0:1] = condition_frame_latent + self.action_latents = torch.randn(action_shape, device=device, dtype=dtype, generator=generator) + self.timesteps = torch.linspace(1.0, 0.0, self.infer_steps + 1, device=device, dtype=dtype) + self.latents = self.video_latents + + def iter_steps(self): + for step_index in range(self.infer_steps): + t = self.timesteps[step_index] + t_next = self.timesteps[step_index + 1] + yield step_index, t, t_next, t_next - t + + def step(self, video_velocity, action_velocity, dt, condition_frame_latent): + self.video_latents = self.video_latents + video_velocity * dt + self.action_latents = self.action_latents + action_velocity * dt + self.video_latents[:, :, 0:1] = condition_frame_latent + self.latents = self.video_latents diff --git a/lightx2v/utils/input_info.py b/lightx2v/utils/input_info.py index ba5e7d64d..65f009614 100755 --- a/lightx2v/utils/input_info.py +++ b/lightx2v/utils/input_info.py @@ -46,6 +46,9 @@ class I2VInputInfo: pose: str = field(default_factory=lambda: None) # Lingbot i2v camera/action conditioning (optional) action_path: str = field(default_factory=str) + # Motus i2v action_expert conditioning (optional) + state_path: str = field(default_factory=str) + save_action_path: str = field(default_factory=str) @dataclass diff --git a/scripts/motus/example_inputs/first_frame.png b/scripts/motus/example_inputs/first_frame.png new file mode 100644 index 0000000000000000000000000000000000000000..1c3c39c5e7e6acc2dd15f1bbe0c55b4a37ce28df GIT binary patch literal 84851 zcmbqaWm8*S*A8wi5Q-KL?pnNPu;5a>xI=MwC%C&62~wa~DNx+qO7Y?nD6Yi{Z|>(W zyk~|F2{V~<_TFo)%TA2Cs{Cv0x7Yvx;I*OxSQ7w1l0^K4U?L;F(R$0S2tWb=6v0y3 zKKaN0?CL2OJxQ_an*W}j@~!*3VgN``u|iOKkV+J08<{mTr4`dpre~Q}FR$<=*t{qz zjyuwK=)Il;GW;$!$}XQ^#vk&z_LgD_SRaLBKjo-8s-!E#F>=H^0yA$LJ>>UrIYB)EzG`~UIb?i5Are#qe| z1`6mAW-1y+sj}-<4bG-(BLON=ld|IjS&%{7MY|?#KetR-%)`t+-uO%eka86h2Xz?|s-pFA97z4aDr@ANZh{%lD;J-K3Jh&U%XlcwF-lj>NX7uy@duV7;`{Cf> zg!HWBV+J_42pOr= zuR|2ow(LqV*g0``eRy~xG=8bKZqT0vHbMXt27$rdT)u^DACxBy#;Hq~rNQ(rQ&#%u z0CeAcB%-unrpXOq3{>fxe!-%Mv@@ZI;WO;S-}`uyNNBwB!Q8TaW}~=wTY2h7F~}_pkg)2Rdzxi8C)> z=Q_oM%(sJ$hn;k6>%xjXaNE?purKfdqHj&Uo38nm5Kn10S<<04!@2?DphAWOpw)AW zO^**A_3h(E&eD-=<&^kA->sa%Ea+cY7loqM=E_GHJSH>A{Kwcn)&h5daT*)fgkt+s zNPhS|CL1O^3i@njk&ML2@GU~(_3i}LC9ASzn+B=quLH%f(K~^|$H^W-`*+vl^-t!t zH-B-xOdL+?WFwF?EM`UMPyxTjgl!5KK%ZhA;(6qT4IHxeri9@}gtFMl%g%(dsNXlv zhFPt7{LjbEP}w+Y3vo~{&j}OytY~-r`$DTa;$%!sDQ;B>Fa`OF54G6M&X`9{{U$1c z*pYie2-IY1m8ST9y9LcK#O$R@$;BWm9|3R;vE_hu{@fPWdh0TOqR z6u3vcZI*KY&a$Wguz>)_LZpyHB3eC%doL>w%Wv+S(;T$B3s#+lI+pPb;F{Rz%U|;& z{iQ!CUf~HH68g5jf*oVyzU)zd;b~`la=fqpr8`j>pca5>Vgix z5+FynM28(PUq;Aeln3hnc00yo5DvZ$LmbDSe*{@vRl0pdFsRr%B9B5WV-!%30j%_( z<#$~@!ddiKfZcQ!*caD#u^&VWM2BTwJHJ5S1ays1QAo%**z2M~FUHzp*u{PA6y#w(c;dW+Bfx5W7WQ z!IPjO@s0lrJ3m0rp?M)PAq(1ZBCo6uj=+Sl)HZ9{dwJ=w$PSTOq}nsYf;2G!8iYua zg%tqPcnt^(^KQA=bSblevGLTy;lqE|7srHFHRQOVWbSU?V!zaLibR3eoXy33JsUv& zE?)eB^d?rz!FVgexiWQ%tGBw=A!Kn>mDpr{KR-^jHN=Vo+%0IBk6^DXs`?4n?9%Fe zr*(W_%2BS?=?9L_t;)X4AXaduE2=Oh3rM<_B3Xd(4E~u%kv0CCejW9&tC3;}b8Jba zPWAlJ4YOi?6Kf(8Fy!)gOg|RjQfv_j03Z{DG*5}-S&@OlenQN&YbWes@4nX6Ie#;p z5-IiAiX@iB0zd&=pTa5H3mLZCebMO4zGUrEpK-*$&)@mivvtAxB5;aUQS?1EQpa+? zVOo~T{>|6OhBusnWX~)S!Rd3Era#!Z(dH{!QRV7L+SY^xZx7@_lKl%HpK^V%DExdXriVFT_JZPrnE<_Ed z*vpiWx|a?7B zBCl=FP3=$RIbNxTRicj>%8wXmb_$By*g@MpN4xjN!~qz5qFSw4rH4jNGF{=TQ>%l- zUiL9@Mx7Cfwd7sC`@+h^N-xlZbJ`tq`-K1V!dn0^U0@76`<-&U`hS;xCX3{eQ6U_P zX}|{7>RWff3d%rt7_9bCIUGEofYb?RlBlM*FoDi@7h7W&OU;Me3Q;|KACGpS3G<%! zH@i&FG-vFKbReTcf%w5njfwoeP_Cn0ECN-(>+34xRL^m2x{}3DF)cJIbFHZpQ&Who zZF;b;{nTtg!f90b%Rw=9*EeXZFVem_7V1htZZTp2FvZ~YkBX+SV^_J$Qiss80@QQo zN2Gu2>FH(X zz%r-?m{iGvf2Ds&-4?*xt4eH_#5hV5Rswv+$7IcjUGTG8)q&@u9XV1REz#09|2cjl zw))om-OQt6&aT-Xpx*FVn@W}#SbzpHN8PWz#@xZzK%yz(K?h*YAPZ+1VgS{aD$^Zd z2Krrg^34XFJ{RWeYAO^+Ay>u>@1+Inh64D}?UTYSG4$5{@tC1T`*o)Ys2A3M}gCa2d6#YX+js z9ydnNsC zaoM+Eiq8Vs1oT%8JgbCZE2jcj zJYYc|mac&!EW3azd(WyPXGCQ|GZ1b*7$z~ihP27$@9j8;sc}dSVwK-T<))X$}m_#x5YM{dmhO%S}rd*%vSYS8}%N67s4Gu zKGErFHh3Gk{R-Cr66W95JcW@ZQusf#T{OVjz1gXnYT&8YlUm}X80S3f&xgIgBcep$ z47=O^eBKuOO~)ZqrQr9HYE`}aGH;(w?@D1I#V+~Oa3GTy&jXI-d*h=AMnx2{<``+e zmqk}r$4gAWZrW82ZEqM}r15K={`TfrU)Fb`qIvX+OuPlEOh*+Rgv%!e|k^uUzHD=!;1! zon~OyZB&oz+xgzdKQy{A_W;~_l8z~sjdP)%s6w%{B6Ym)9N$(h8VyA^Id=O=_j*PL z7YvLAt-j5SCBM25^j(2FlrV!z5vYT8UMMlnFw7f%_l`!IRT+?&Pv=nE432Mr#!>(@ z2_0Z>K5?JqiA^UoKMC#5D1mkCE%%j)X5$9>sHFFYUzkwiOTXwC993$bco^8_+SexV zphqtOdL*|o70J&y9C79BMB@8fdQ(-cdeiVdQ)RUV(g-S3%p$VlJE^MXaG7w5G3Kwr zwkuLXF}y>WC`(BUk-_R>HBA0c_JLGSa9IJlSXQJjnLG>kTR;c}{>3dJ2+e&wNWjO) zr_;S>A3ylt(ppKXI%G3nOICm-OmmZh8I5355mhfegXJ|^@nB>~KHDp|-PmHikgQ%7 zJU|De0gAc83{z)uoBG*<|2Ztcl&7po zDo#U#CDIlUB5xr@H*J(#UNS=fG%U{GF*qs#VX%EQ9o(T|Wbmb^)>z37Jz;I(oUWa0 zWZ$XKa++-~m(F!K`3!jN4CCQZMxwyQXQo_K)a1u$(2eMUB(a>J0{D>{xK1D(A2;Z5 z0IZ)4RRbzM&%`~mQwh8LNCsg@T-231>#}=fA|=}H7ow1arcIQlBbR{mrVWEy=ot{FJZ9s&@j<2uKV-tp2_zIGlG6+V)osbhEqreFv(^;JU+N+-ns%k|yn*-7N~kWmKE{@# zl#49zGZKJ(4U=TDal#-EE}YlZWB9#Sk}%#4wf$9(D;!_4`%Xxo=e$DN8jWs~|7@H;JEA9xO*-kH*INv0R4Otn%pwdM zT*>z0Wb1oHJT}_!NgSZi=83?Fs77c^&~;@4ZL0b>1%*9ev(Gzad_;7kO)QtKQBr?q zZ)-@aw>W(KclMpVG>n~yx5GWL1Hfew<%k+yap#KNpQ_1P;|(+?EpBK?^*IouY`nO2-5xu&=i zLWedZg-q|&U6`pUNLjVfLFS_{X3`QU)XhdF768C+B}aqGIrP2mEo;8KA`7R7+7cC2 zdlSg*Nku7Q_mmwaw64f}o)J4zS>5&Nj^53j%^C)z0fF1Ll4vm@DM%^4lqPCOD6{}D zp%qpqwF=wdT>TU)*KD#oaWa5HG!tKgXr4Jklbu+|Mjx4Vk{AP$js$RFZv_FZ(&>0~ z1;J(MW(k7YLNN*MVveCDxWL!6KpHvlZjE~;J{Az?FR#=HlBJHYJ{Z+%sr|KVYr4sO z2;dE}!U2Pi#_u{YUrBH+KOl?&)QEP;?UE}czT}j;B7!o>4Yn{=hHn^AMH~0*1Molu zFZCT3=jGUA3MELGG#G$ik#%vEm41g-I^YN>njKdp4Tvrs9h?%IAwxz)o(4+pJ!btRilYrgM?)$kY??n7(WEIfo6ZVjx|GKF9W{*QWoi2+{EQ>w;0x(w7}oBk z%^g>`&G#{bqmFl>T=+T@Vf>NE9WPbHY8PEkk^)O_S2yfYmQ(bWHZcV z6d(9eTyk+MIyyi0zA&GPdEhL$#c2BhUz#3|6AX~lHBLL9@8iK)ZN)K;MpjaEjoZr^ zl^iVU{T8Pg6bUu1F%e7; zQ+~g?P0jpA`(fX#RI;!+wNevR-$4R5kZLuzkN#q`w&ZTu?5vC4J!*)HGP}o4ukF+@ zRd0x#5QUsBmEQr^!c|;wT%9SY#&DO#VS=_gIZeQYhLkENIi+QvVEA5Yi^aSyr~%B6 zNf~;qW*y}ik42&t!nHGwq4oMFK}OvmLq2sl`&X8<7jiu7Uygw>?|kI56kJU)?I>FL zInjbv#m4Laxz+(p$nS!FmNNCAdckbC*BzWaxgeWUdCFHrz9pM7gSwHS#qr z=|hjg4-+_+!IGqU_4m0_LWG3!MIA;V&|P$49>-rUnHQy)m?^UZONIt8^I{6!23B@t z4Z?;Kh&sPzZC_}C+WjkiKLyMPVbN$uQ{brdj{1!z=%?h=UXCXXt4L!;l}mE`*CnF< zJrU2L_;Xcdi!q@ydyy?qoPbQZWsMk=q{yFj7IoATeIwq4Uu<`{E=nUM;#UoOTNNpq zC67%%Ys3WJZ6lfE*ALHgpLtFxMum?|up9UT3sC#}b6RP3!mRvzEzjf1m1)nQG@m}2 zeMC|F-WnQriNqRQZTc2D>AhEJ3TW%E#ndV4>F{R6FqZQw>L~VoDmtb!m&BUV=bd2d zc`N{c{z&&!I(rZG zp547LnrKe2#jd*icUTO}o2EK?>|Cz~{ese-gFiY&-Evi^65r8{HfzGYCyXiIZBwok*Il(@8nHLHw~gx->~Um=$o{?owau`)+_y#KOMM7fFmPa9 z)s}~Uo&khxLm8wbUMvA18ragI?DHf<+j`p{OO6PcBa2ry(b${zyV3;uUj{-^txT(l zh*D-v2+S_GoW)^^;B<=m4tSXp`G+5Rak=1_NcjsPMrOg$rqxpEV@nR+=1(7L%$#v_ zWKsA??g(mi|9v|RzO_w@_a+#TN_XXK`-f5>kjyt4Y$E+#!@p58t9~MyTTz>8!m@GD zFgZuU##at}{01_YUeWuMjME<8V$kq?LiyeFRZw?Pxt*e$8AA@R@Sg9=7_Kry<0rZY zU7B?ND*7ot2K2F3NB?4dSL7)Hsg&t%a_9${ZOJmRQT8X%fayvq1`HUtuCy5oZSGR7 zCwoOfm}A<0jFh%c@ewW_;fh$XB^$2wc2hFuKfK&n7vzhyS8I&!xP^Tsr3<-q{=HWX zy^n6D#8;N%m7>sGR*#PrD0G*bW)Rs+PKJ(1sZFWnL$v;wOwh)|NUf9{Co;2kr?J`+ z*FC1aoa0ar`*;bn`5WC-dW7O(N;Qtoox@==t?7|lUZ5Ji_vu$|vk_ovX=$^f%x>k- z@t1vbP`3nvs*8$_^KferwvFgJlrlFgywwg-n7VlkglQYq!PmZ4>CjFW)S0O}?l59* z+IBsNqUl=r@Rbr4n8gd*e+lxuIrGe@IeXii?TIOF;WLTKx6T~FSdtuqyCxb6h}0+R zGA+n-W^QHd=;E(V4HA8*+x#RX2ujR2Pep4nsTT&gfH8)(d7Iu1A#Uo#i!4;1M{Xb^ zgNQh4M`+*2E+rqShji}5OIJsS)41ax-_Xbi4E{PYZ{M8F(G)hxDnl{s;NT#p;HaHx z^+UMn`*&hl{aPIq3!~gdPmx%Vi>kxg>Qeo}g8e&2wFQHZRm^!x(75ELL$}F&SfK;< zjwoNxo}q{B0O8Y={dn3z5!f$d`rt!+TTc5>5MR)2R1xoHY%x1PyD->=1UCwans}_x z1#RaE+043cNtYBhNK352;(+kFF8R~a(d(WGfP=0REeb)@?z*abCtmG{b$5NIGMAB& z;nael*Ubi2JcjX_1mHENmc_-z0(Xtnnoo`%?Q6}=%@ag{c%lWWg@5ne>lT$#%jQqA z>DLq0s|GR;UhadRH@Tj-xfT`|ZLF=W-%hSNwVxD6i@QCZw1`H0(W!MB78cZUu3yQA z`~I}uw^JjVM8~|aB`n&Y%}&Jma_(S+6+cV%l}%Mo=ck-S>bRNao3-y-YG=sz`2*Kq zN$H7^quTFqX}NJ3{_=4~USW}hLi5Qncaf)rl{n+G7)uC{vyzRN(W>i)NUNd-eZrvs zg1T2vVuz-rJrA%2hFo^}#wVuQyxP}Jyyo~-KAq+t{e9eQ=ehBFzyAj?W8eH^&8d1m zb|)LEqpLez_}+hif-K?BJ-jIRxZy=uLLzu0@bABoq^FVfPXBviSbCMVV|P(S=b8QL zjdjK|3)B@2e=K>cJ}Uqc!<9ig!r-d6nTq`LLmz3I_3wC2MXls9;%#;DJ{nNMuO4+H zf*O3jS#NB`p2$$14Q zM8k5kzQ`NA%yar-XJN5(o6sF2<{w!GS}3<~2>YM^zDQ##^n0iizdC&RTbd;P<1zE2es|GdKZJU2PIweRnj$&( zgqDRfcjne_{cAjo2jp=vVupqNH7q(Fm#B{!H_Its{CK~+d)?GSsyvxWd>N)Um~5KR z>DoaxogF)%|ceA9GIg^btmoabt)5^CQm?bkGwxLaCl}cLre7^ zi1@5o#m>oDC?5E5yxe4P6pn${bumOfoJ4ISuzqwyOiX;c*6F?a{nKW`Pge>y6VSM@ z_f{X;tp>t~gF4Dq>s0>>dU?9PdbyW)f!77yR0qEVzvSQr-}Spv47)nUr1R{Smjg|P zxzR&q9t@;SorgCFqtdHzjf2{fHPqPARY_2D)#Du4!zuSa--`QYLY4V{u_ac2#UH#tXz3H9i0Q^X--NwJvtR+ha+bRziC ziq(RffY&L7uISP2BeW7=E95H_TVh;SkuDPWaE(}bJZkZ$pGsT>hzFj?;d4Z{7=0Q^ zp{re3!4N+WHE81D<9jpQ@R`##Qm#nIvwZ##$=s-lo00`g0Fz{?z1ZB6d0Fg!c?o(r zFP?r{Q`-o*nl$Q`bx7bJ2(?4T@Y15g*$B*r_I%YoIe{#nc|>$Oa*B8od*l?4@4o-4 zi*EHVK)Rx2vU_KVO^IE5qHGgLAjT`%V_`<|n8Bqq#)O(O6p-7CAuSB2VeXVJTpl~h zlbS4${*;+hKS&BdR7dv){H$%Q_&38MABu$Q_P}5qiHYycd_|$(pQQ$ii6RCIp6M$H ztg*52a^Q{Hgkw%SywKm;#zwdz3~mwM=5tZ9@MA-*z&!&ZONb3OYFVuvXyDmLY+(1x zWB1E%!J>Z{FPqc>8yoj}i2u#oTl_tEh;GDXx2Ytg!p~i-Q$B>#g{<%TEw_|Xmsz@o z_Lbn|?O4NDdSUo?Zifr>KvV2~lJfUny-GUFnG&@4u6usTzub8kF@8ETH)|&I0i-l={vZ?zb!B(3a*?WkjJ41Af$~AFO^==x|Syj z-~XRDfxg4?^RH8{mp?f+g}(baI@N>QJk@ixI&nviqaY(8|I^j`@yFW}MC%_IgxTj0 ztW3PKSr2;r1kcx$V;OPJf+u1&O%V6e8yGrd%$_*GGZu~islD8{Xy!>=j?f_#CPf0A z2!OyeTXU|;+)=-p2Ud0^N>)hupN72XRCVvRzll`KKhg4vwutakloHj=&lqm~_1S-T zlzz6#uA%Et?Q-S*26j{oGd;9q#%7;z!vST+YntPq-uN9)$qvvfE=4wVbrsTMU0rF) z;!?4W^JZ2Gqg%MTrgX|@KqA-r5W{NY!NJZB2w8Ygd%0Faj0k-UT(%aTKr+l#0AR+c#l&|nW8*GigL^38 zJe+zL=#dUyFD^}Jpxfj8D%c7Z_{mJ(mK#mf!q@g;z`u$`|^z8V?@95~LXD_q$ z9J3?|lWP|Tj0%QnQw;kSQVT!aIzPWWmnKd`OFWrL)@P3YC4Hj{IL>YjpQMI*qYcs3 zlgFSkFI#hx$W8BM<3#<-v2gC*6|zcmRA5}!C@T;*xEpBjm7v`>RX=8==osLZ(aH>9 zUVB$wvV{3#Z1d7t$guVui7(>@^1{Q|w9(d@!KzbBT&#fcyVp#NEJ2E?zm7bIdIkq& z%P|4?!#t1v_d_@TySksoy91u@;Rutt$)gZmUg*m3l`bJYj8F^7$^0pjE5OXo?hw&= z#eDa4i2}np^K!}? z-9HCfeHgu%_ZG9dj6L|8sOU%e3qO_}5hmE;(TzKNsPHmDt@4t~neHQr*Ig^LZiPWT|yDe^WstYlPeZ;^uk2FTrZoI)9L zDj19%`y;hPoe%<*My(n4bU3N5>w37`NqQN0x!q}q!Ivr>YZ6l>IEqoNugY7#_r_C> z`5G-mky_-E_H~nID+g1anX;;(Micj8zlb*zC5y8i5*UR2g4d08(zBI7i`-w&O<`Ve z6<7v^CIlBlw@j1Y)CIsTU*)3rLQ!#%8m#;4^w>VBSEzomC$(VLsGc{_*Y~{~WtuLM zc=_!<9r(0E9em!4p`Y=3H2{YtM6-2h%*f@r(Zf5_n{ITUiG>xw-~3y^QI0_3;Xm&+pdyBAe-GHHc5qns;y1=~N>Q zY|{JNrQ6z7s=#ZFQTwsE>puzNi*M0E-zd6%s^akqJT+ve_wb`}jy76a?S(kW{UbIU zyk=u+=&fdwE0uUXU;dUc?(+3(a{n<*^l+S5F03AsAKM~^xFf|U8L!PKd5%w~l=;1GO^w#CBt@;agP9E!LLEWJYH8q&h9puVGigWmHG;8G z=?ZC76jGrH$jH!!tx2O{A+Mz}?$fueSN)~Rq1K#r(`wUKze(Ni&bOR{uJSN&DSVD< zTNV};+~AjD&Iq^g2!YQIvw{w@GPAOlZ^mwosc89UBNKT9^bsiGGKxn|7UXxkY}0*H z>s%laT$mQyzV?-;T(jz&J_nUsCeuu52raQuITd{GHNc85ayKT;ilva9UAjy<1V-TM z{e#)k;HTl>4#cV=YTn_KQI)y~NZN$a$bm|H5}V_QNILr?C8($DjfwP{k})-JXoE~I zw^LSfn~_Gt2s_y;Aptj~eI38ObBc&8DfQd;>FN-@w~54Gn!^=XXh;UgWkQzBn1FB@ zEEvOjZP_md%hBhJNzm+7?84b%`daI9wL+$@v~7o z`3UUXbwBCc_55&zsJB1&S8)AI;>Gntetw@r>z}scD;D*zsi)lK7bJd;k~IP!kVn@egw$KKm6-b6Ib_d@U~472^Kt7`uD@d5c<79%w#ZmY z9*RyOVvtc9;gRMm7Gjd)P`40iX?34jN|Q>z#UD|!wy6rI8q38~l@!S?i}gZ|wt z`Xfxn#T`=?5G0jeZ4QwQ+0KUQ^&fk+PqoQY-QR`diR)Esr_$gQeKNK9D@{LdTsK@u zKjb?#F5z>&InLGH(bYAz=>9unpPL1iJp(Oa!{PLl@R7={YHXemUUX{Fx3R`mTaOqL z5N^YXIBpG!l-OTbb%1D`Hfj5)>MfCWhZT4qK^j`$N$L=(v^J zD2HgzJ(z*xifSE3Uv%`u8DCJ5d{c!*sjJ8dSVY+`qW(sy2EYB9^VZnJ9GSKsYyLI) zH^R^m$A+8c){x4@qYJcuK}8>%6;ta`9bWN%2Wxmd-Cg{LgnL3!Y^L(tT(%IvV*5y6 zl87D=gApj7wRJJnv3OOwfFPF7ryKv;z%R=s7bb#6$jVMeEPYU@8V%4U(=kw zPNJ36G_$d>0lsTiU3E%{5qf0B<|CcaBxx6(a{=iL2IfbU7C z8V#`Kq^pf`j>g=q5*fvkAZENLlW z$HN~sMB5pY7rz9;e}9${tl&>@7$#4Q>VB#qRTF@de*YM(7`c`H4d# zBQuXbz$!3BIe08DpY_gFDHCl>1H8oR6f%XcBb4*B`wh z7W8zNLMQ)YjT7kli^s1bv2rrt_wpFguy2TyFum4-ytX+oZ?J`)Ya|Xu z+(!wYG~5=E@H8!!tc=(EVoJw4}{vwB&nCk?1uvne^YwOBmU?&A&q#_?d-jLi$yqd5u@ zwbS>Hz&$2aikFAQ?&sT8=OPi$Kih+GCti874hK1cq$PpKK%@leJ0WI`KISW|~Iuz}n<^0V;70eq1$*N$eLEB8{un-^%6Q z);C#}A7PuL`TBiO$f$0S);456EI=_6rvDH3N)r5Kdrz>tHy##x!!ZzW%>DLwnO~(XOuJS` ztE)P@#>31_fmq?t{zMyc-MV1GD>w?bjRxN-vH2c|4|w;mj`V`FK7`lhh0 ztLqsa{0y&9D-uW4DB{qz`ju+O?)MKfk{$NqgnH3z)RSkbDdZm#%Mk|;%ouw*74`rcV)I@PKW z@bPJ3gxuVCKBsc+fP~on&TU;O_yl$OGkWsXVnHU8Y_HI8_Grqv0_rTqFAO{G_w$PG z2JmP=!b+%+BbA3_#@^}L%-(R_Afc11xChceN)4IF<4V%|88 z?BT8$H~_{UxF4J>HV>so9m&7iAf!&LeAhzV)WKr})wi*EncBD)^}juyH4r>fk>Vgm zDz9m)dDUI+qM<7NZNUwdfzOwit!GD!i;n!~=wD5OEd`9`or0E7C2#XB5l7vua9Z=k zN?BS-8tlY!*9;(gFTfv;&B;JUN!c+n);Pd0(N=xYroEx zjiAS>ib4^@o*FuL4yNEiSTYsJHsX0UDTJaG2K=ftGt~r2aFfaKN9#US3Z1+ALJ--) zg^~qmw*KP?T~ff=-yr8&O|=qii?>IkrCe+{prblumF$c9jk6|9ghsk-h+~Sx4nU?M zQhYlmT%G>={hH8I3sDuZsrm)8Nru!xb0qkg?k&l1(*(uaR7qXXI7-4^<8 zH{|y>6a_;qU&!-uJ+SR0{Abmqh5J8R7bsteXs@Lh**H-h8b%jtrm^@)A&;>u)(0d~ z5}_EW8HE^>EP9fL0A5wzGb`Vue`z(JiE&^E|9}aRVA0F5{OZ3w5QCr#M;rH3-ctpl z!^TAk39n&A~443jG_+9;_T0#KL}Av2BOkWw3SA`rM#nBL5GG?)~u-z zct`KT7=HDUawab~@?T^9M|!NJf&7uL4eZIV!1GKAB3T4HUUNce>n*L5kvR8%^ua)p zqTDh_X;J4U{>8CeJ+!VzkRjjwYUgH`CYBmH{d&=ht^|Rfm{p!Lc7za$;I)?8`HYp(=s5!&_8dSqne zbih6z0#H1?x4Cav8QpeLJQ$JxNpwGJ^jexVRsj4s&7C(g%9AI}?zMobXt$T1Ra1|BbX+gbbsEZTe+ zOT`RSk=?`pigv`=kel4zsAp9>>1a$)DBJgH<70NyE7HPW+zL2ht6!%IAf$AOfV=aUH%$5NsTerMZ+H6$5sZG#iAk}~p4O*WGQJwZ0%Ay#qb6sM zT1J)$L&Gv@GN?5uLa9tQFfe$SMS$i!=dK*IM|xG%H>B>;4Bg;0W@xolO5SYDGMC{L z0&sR3Z!9e_k8%azbTdX<;#gVo<6hzGi4u#cR(jiUYT)lTIxbxSs(<-O}Kg17w) zPw8)ziacm&lezoa>pu}X|8DaK4BJpSbvJ@2Vp(59tCyM(i-3KI%(_h)SNbp|phnCh zuB?d{+m)XhZ`HmTmrCpoLFXwEEc9+P4G|#@NE8lD+#&OIir0Rk`CjMM{_(r8tVTK* z=ts7s^pY-GvFyA{me?8`I-5~}9kQM`w$E_>hdQqA??&*;`@g@nsu4n*0imn_*UwjX z9N?{Q=_ry;(O)#9qnOX7Na9-%kSMkdv0%cQo6C$QCJ3z{t6$ae;sHVa=dvBO+jexIR&;i_{pa`|bNeEXeya;*imI}A zhgYJQXtIB4)53m$)c1S~mqSxnb}Rji>LlOBY^D@RFUNN;8Z6qsYgot^^S%5}0X=D6 zMCfV=4HO9T#RW|83!NkizshU8%SEWCw*1*lF5ghP6H)1t|8fkv4%moHyA$bISwb3D z>}TlmmYP=&Z-gku4E*O*1m9oJ$S5f(ZLgViw?a=*kcgsw+-ZEifCwvfh1^Ud)w-%W ze5_lnJv(KYvqs?r^cZqmSa1D@TnRjYIkZ#h_{An-|V^1d2S1hN%yJF2hGsQA$692AO zox3O34d4_OG3@hlQ9V{P$o>Q6GtH*6VWnH+|#P}jOe|EG)L4Zow>xe7ItR+p{c zZSH{2>E=^_-XCo}Xp!Q&RDpmr^*4-yk$FrsVPcNy^ay@6@_{u&$GWz89ui5$3(y7? zSm`@{YhEDJs?rYO`}5JvaJx`4zzc~+F;y+_j}~Ggisr?o<85h%UT;-TxSd}~Zo7!z z&pzNZeP+}k7PkhvO+XsTVt>XY^P)qRl)la>~ z4VV#56|*uRTLo72y zp#V1rcN>njiG91mhSyllZu(niX6`tAAh=P*oK#UVs#Jxx=~Yz&iDtG0@i&PPZov+AZ`?>JV^(68$2+$Yu8o5nb_3? zpL`;Pu;2B;*ci6E(on!YJb;;O@2*3nNP1xIjhbkDQ(|m=4^zhdE=}Y}h~}6`7JbZU zG89DojFh2i_)ALzuM# zMAxSXf-no5xO^`T`^cLMT>Tb!5JG>fE&8pLxN+gH&$^^|7zd#lK*md@pJ8AvLu0aU+mCZ-6W`;qI@>zH#ZObh* zZ!9udl&yra6g21W#ZjL*$f>`sD*oxXHiJ+gG0Nt{YXHI-I_Z9>?tZv+sLOxve~TFS zoyODm;LMsL0u5LuB4se`7ilJkWG`u$Zy?@#%-|-qr&8$R@=&`FB{(?X+P$#|Xmi&9 z+pTzhrVN=XaI25B{{<5jy_lF#7BoC7H*kQauOyLpNWkS?bv9}hnxD-=P6wskPP@FXKK>|aVj8(zisR@Hw; zBn9CZo+8*2lUkwJe{Z%7$#HRn?H9N=H8uU@E)08K99opYwQ?5LTkW0AhTWGi`ylCI7AVnx!po#!F^QmtElHEdM|AHhEtJy^r(9!sARV$iW zTkn>qgS-%K7o-X?Z|fFUPrj2U5>T?8h;G1Q!bU95v74~Jm_-swIee_r{`qZuSM8T} z!;?ia9~6muk0{3AUIfWVEjrTX`?|^LVdH`uExn>_Ch7zX28*T+%4UEHDYCEPe3hWR zzQk{(FN$9Q2;kw?xGz*<3JgaNt~gdP*NRxBy2WSzYs}sCBxOLi`JRg`S*v4cXt;C9 ziGfEIfKZ4xe0CBLrig2ct;45u(n}hk z!|LYmUNgsOkrNXJ;urJwALxu*U65i34}QZiaUasbifVh+c1NN4jjujgB(hW5?jkfC zn`IkvUBcE=zg5z-{kF@)=6pc?w{O>nYQedAG2G@ZkBwiQ-{~04r&gW?ETL$zndNQH z<}D_eBy;VA%-neX@pw4KPQ{3{HA$_ebr~wD6SoV03VJrG(jMq#B!7SFm~M<+8wV-{ zgi0cT5xysUe_#B5RBdW<^5RrT?Mr%0dBN;o^Q- z8G0;(oHOj>2#Lk{2|*`UJUQw&MSwQjU+21xy9f@hA&vQ-)^D~Cc9Bor!2jQj})ZWe}1zoTEpY7es*1XST zWPHg)eV#F9Kxf4Sn>Vd%_iKuUw~CnQMC4b<;Ir zg#Jy4_k(CEq>ZI4cg`n99>?m;+n_r>h_ngy{fN9ZeBZH96ka04Sq?c%bUxtS>`aP8 z2Yk2-co)Ajgmy;xMV4RkE>-O{&~vIALT2$XegGhT3rr&=+a~Os?7@Pu#K^p?XpD@y&{sVHMWti@A&lia=T1z zh`Bk=;@GJ;_@40`;!#Nm z5V%u(DOCj?_>z2#^X9Fb>}6!>p!sot2Is4i8N>t+-p}806I1?NJ^yE7VnXeCS8d&U zBM@;gE{RcOdTw$Ow57XKl}!1v#dIRpJB;2Rm$I1$S~40x&iB^Ia6`ni`4A~Rgj{{q z6>C_z=JYa+2;>wY%GImAv$3)7{Uu1vXb(TVl9a8*T*vGzQ9D{85HpJxT?}KN+^`_= zqM04=*|^T6!^hRjD^u1427*6;2NK+y*b)0DH^3Ei5vL}8znjBn13<***x*y8rPi7; zB?p;)eB->Gep3c3f)fA7(OJe-*)(DNkkZ}Fp*yAI0qK?y=`JaeF6r*>lm-DQDd|R1 zK)M_0l!kYC&j-KAPdWF!J3DjD|C-f(e>R~rn_W6?8TpP^RjvcqeX8s?#z zv->FgUT4{HnC<@hBsC5F?Cfov9rkxx-!+y;?f?|#ck~oL#tT1c^#4t)aC9;TU5kNr zjl2nrwmbujC?gWwuh#WL#Q!QPWhHqRNwl+JAn&Y<$a5xK)Eg5~GgG3qV@VX#AE;Kvs0!Zy6ApybD)$^{Qeye+YZR_fZo65&_ zh&9uIsGoXa!*~CX)||e%Pc#L|=Ddh)AT}({&DpnmQY4Q3J}(hI&7HQM7G!|= zwDw7TB+G-m7ZLmC-d2$ouYLP1E|0F$@+uST^?c2q6cp&H!jmlk{bk zOjS&0j?#Bv5N;bcYt9)p+29c|>0|G!;8`O=DqRXDfD>F1DGPJO$X={jcud(y`Tp-z z$^BzHSDRMv3JP{d3V}&J?pPv7-uVvZLuX zgU@z$$FshVv!Ks-_4#SE!(%g=U#D_rV*{FNnq!PJ04l(&+*|~|qs2yC)w+2z<^INw z$D;v?FNc+lD?+|cUPWqh%=j2m&SRtF+k`m5QMhrM)DQbTma)M1Z3ufoyg zSl|!_$$u%XbvMg~FS9=l{MQFp?GM`=9sd?=KyoF|R6NDH`73R!2S=KLez<#)a)Bp! zk%xb?fc{;Q45jyF`~7jtQNtqls;V&n$E-3aM7)~nC#XYl-em9I0%>#MD7wBrn#23A z!b^SRvX58Iz&g(P-ut5%Oi?rRr`BJ6+iaoSE1-H&n!K08(FYBQG$YE=934qTCqk+C zm=77nHQ{vzxwqpWGiYgQGGA@?9<|_6YH_HjW99EZZ6;sr3PqP#Gp+nFL&p29e#by* zqX_o{--S{?KzPBk!lV#(-~Urq_i;SpzbMU-yV^q&1hf{exN+BwPk?68*43?>aa?2g zQhlp=v>0EfsxpRl8wKSe;BKDKIa#PbVDQ;s5V`uLyy85iByDA`Sm9E&xUIA$Anf4{ zB^mwFLh*I2Gp~%T@U12?bvt$b*8<`1X4Fn%u66>jXzx=dY`RY>IYI`vyFY&X2(*e? z)-Rx`3{>w=^lkEGI+gw0w7Yla?QmqL zn=q*Acz!%t{poT**nWyI6Rmq-ql#y)EiY;FXn-e0L<%ghQkk%(iW~>lmGQrI%GHy0 zn>WP=(no6*Ws(JAKuUKX{n@KPt?Y2M!_WXlSm=!6S6Q4?3KmxloI`PYAy$pZFQT!3 z|DvO#O+geDa^5>#?*aNDj%8kwIDD5-!@CP1PHntRk7I>#$N$*AYmfFjv1bR1Y^WJX za?`mlm?W5aF8p-{;P1x9#!=(y6Xx@}DCot#+TA6X{9#l1fnNT;O};X|Tmdc+&54Go&2Qwl#YB9qHz=k3ogvdF3(&B4f{s<2AtEKebd_iX0aF`1k7Udd}!(}V_1D=@Aeu>Zi_m<;@cJD6UNU=d=^2EQWEUtN+`x?q(OzIEWmH+%)98o{V}(=8&+chm<&O_n z>Lc1j!se=D`rJ8DAd?G_OzDCeo7wMnjl)W4^A6ob)Rm**gK2|02V^9;T$$!Sj14iW zJlYI)pQHm602}p^j{!C=mQ2v`?$WB?9x~{?ENmL(L|iB4fEv0`bwO9(JovrvZyhJy zKL*?%LqBr=$LLxuDWASarLM6`4uTwU7Fy8Pi8Kta6p zavol=bbYsF7r=nLG+ZCTPJpnKTn7GVo_I7-#_e)W4px)Oeo zg$%~SG>J<-vP$#<xuyxG+qaDCi&(v)!|%8BgHE{e%-S-pA_ZcKn7>|h&GD6Xb!tcyW6PI; z6780Vf*uAmH?Ei6Rz+_ap3OiQ8(vJ&cFJ|O?9lzH0&9ND={HQy(hD!S#mIs-;yPr= zC#6oHj)~nAFTQxx|9{|#~3Wj%+i=nIgt}S|q*`I8&zkza6 zAcihu!rw8*@fG0x{$x8z25aSE;rmVgTs^U@w2`A5=Kh%6OAFhH%F$X4Bmj##IZ5Es zU|Fu#Po*=%ST?ExtviRY`r480W^TlAUXz^Sg98?D?0T&irM_rO{HtD0pFTwwnKK1< z-PP6GI;_Em4ojNhPXyq53Qu7XIzmaD}_Nv{ghApXe_#$_uv zlIZ^LnXW$n>T)0Z3?KR|>WHPV=a(F?tA_vEo+VEEZN;wk+uZm>>rI22bwyXUv>K`c zW#IxGyaAcmCKET`?cHFQR0WxnQ4GwEn)1@O0%y}weF>~GOvs~;ua%XY&8V2Z_Jm?o zE$d>9=>W&$B*)Qw-EPOz?DO3;0Qo_`7Bo#kB{AiDr~Hy+WOm$w&d=@63)dl;1qw9J zOUAk|(4MAve#wY|u#l3H5;gfkoV^4Cuk&BCtHQw2F{a;I-QJEdwNdv-Y3>F zYElOb3N1!gz2@E0U^I|mP-57I0DS>gP5u=)bVYBvFpn4NZ9u8T72V9TIIZ3Ex_`@x zL@J(3E~EIc)jSafyM7VyEKXprfj-Fow5H)F7ois`Qz3Wzo$XHM-HzCiZi85mm$;db z#ove8u@brf4qb*<_|l4e8?dyi5j6%?*Y}V0^{-R%z%g}Y)A7$n^b}6Cvmbl=;#Km@ ztgh$Uc6TQli>v697=n^FJi9j zbEdKX0SFqLKBkKIFCys)5H(vK|1J7%^7|ARR5|@u3HJ&kOUNanhvDPLOr6d9)#rBL zda|Fhs((Jkd_H|y+52}3plCdsDuF$?w1+4AAO3GHtfRF zmFj1NEDVxx1*7GyK0&&KY~E81y{68OK!YBy)ZO<`KArv}fAjBT)pvm0jmQ+FrYm53 z%^G}(=J|Xs+^C&$r(qFATJnzE_pFZ)G!v)6InKes;R^tB-u_kq{e}W_h3}K_HL;}O zl{6aY#zuOnBieWUSCqw3PZRv*x2_@`Q6LTP;Jtg#a-&$1E>Y8WuU{0qdbWzTQL3H= zvmbZoUo=tQhuG({Y`2xN=)W#4$=3KP=8rjC{X4f;8qi(GE=kp-TZ*{!lLnL=gA{?4 zH~F0IF!r*2E@Q###0cLJiOK`ElK)bro|Vg8&sJMXSmtkjWTJdH^csBEvD$GpqkHo6 z;5&%6&%iEG)%H)PeA>jPPYWpD6E9H)Xr5Ww+4}nW(>kA5+uNqCy)QCXLBky&Q2?v8 zOH2ICT)$a|7SVsKmG$h+O;kp9=viaKrjH6 z;H=NltjJZFGN=WBH^M1dhwb`Iw=`*#yK-gIE2kVjaIYzZ7<&FHe`%rnig1V-0}@uw zf~QikQwViO&?SfXV|dt}6@?ANKp^44#;}z4P(&MA$5e5nE@k4}qbx5f0XtPFym*Z^ z8a33Bey6;Pfsq4F#;W8meSec3RRaTRnuV#hIgpfP+*ozu+?37`At?muJx~hu=p?2 z6|66Hmaw_;+IJ;AuWhDy^|iGpbe_8?AG)qZ@Ad#X7{vt^0a!W9XBVEAt-uu+C{>q$Js>1vaj-ti7=rwt41;E56PzuQ4g&4)a_Gw zG8t8MC1z5a1NpKqCvDd+)!Yeq%hbfc^fyzQ*_5#RxTGEJcjp-u)5^*@l6}Lt2Zd>q zg?H+?>N1A+5Ks94yM6IDw1a|ZNnu1ogv#G(OicSPo8+E-1@a`!Gyl~x|5ljnb=;|| zn9(V5fEPQA5r{Kjmxl_b9C)96Ssf640_k}yPl>sH;h`b@mLOwhiU8RC0ENxf)c@r0w~GTJ7N+Q>eaV!^ z3o!}mHl7)7E62aww_2p}a zXSffI;@})yqzy4S4}V%y;)57@qq9}MUMy`n8hdzncyY)09-6-BkkX%QXQC5n72b~p z2jpuiWbJC>_01T#nSWATaxsrzT(_Ng1J3N_>;re-h66{`$@62!Gw6n~xGpjm{apSo%bu0_3(TrArAC>+$!lbmLq#~%l4&<%br)xv zpb}xjW-PTEE|C?MkY{p*EjXd4AsxVr{;Zr(h%?aLDEe?Jk$blao6YC^7@neQ62f`NZo*}y z?pMrZ!N!7t$S&}$Uc~E$b9Wm>H>G@nFdMomXyxtw2y&ytr^kEJ=>C_Q(QdJ!Qm4{q zi-rZ;eh}Xiw!C{sB3#I2?-lN;{3sDNK98c?}*dzPm z7ybc}NV3D_Ug|GdrAy8=D3qV4JMJ`}|A8|fIAyYOxkW|CuTthEXn0B5VAK%Ioh2!a z@rtmG*?38Y`;Aqw6zGe+1E@0#w9&C!Le5?b2~@CUO@o3G9PXID=ZU#nB;Hc%c$sx! z@G547%`E70KShk~tBJbD`5338!=y5$ih?nr5F9nJ7@2M!JXnA7@9xqRRMi&A74PAC zF(jB|TIC%rrDISL+cwM8%DlE&41rW9z_7H_<889dvx!Dn41}VN&6pS82r#-RdtH?q zR?Z%X-q(o&|7|*G+;{`63BUq?jZz1WaSns6y?}Qmpl3Y((l|bRN@m6~$yqOr~^xDRcvl|(Bi%A>nSR?ZbHEdR##{YXh z@O`YxS3!ml-fdEC!Eli?SR)4n^Xjm(F9)nVl0HSOB*}_}BrcVD^+O{SSXX3J|5)<+ zJPi08PUSvbg7P$Jatt2~s905(hY_fx>00lQGg`7gkFIDT>zzyqMS4#Z78pd0vQe7l zf^+$*r&Pp>FYiIv)BNu^&V8R)sfMZ`I+&b!;@T4BXpqDl8zu4>hdJ|e8#0c#Va8)P~>VHEbOSq86jYV+ji!Z9Q zQ~bd!3LvX`g9!G*W`RfTF{|(9um>HU+vFNMFM`)1CL*%Z-0ix~s<#c!z}05KCbF zWD(-&^n+3P4co$dqgC_GnYEw$qab7JDou!;5lsph(~uqmgq71ZF$)qj z;>OnTZ-e~JP+NN)Z4#A`-3t`AFBZXhBKnfVY5jk+)KJ1Op7u*#v|N`rlS>dtlK`%+ zpFng-SOP5=>N#rRv`X%_1oBVtG%{)YF z37Hv34x(&8CiB;^-m`RNEj2nDA*Td<|Ccz{qt*F!&B+Q>yjUD7Ocou7ZJ2DYA#uC6 z=jZ3Zm40_2dhIkkJS?YH&o`yBEm-1;Z-)mNyl98_c~DaGMu?&-QGtP7GxA3oOn5Kv zFTP7ASdQJq5rc9rI>)^2mHf6w5IJa0H@~Z3k&;`-@LU|;&< z4VJjJ4!=h{78_Hwv!$WAnw_dxSK{w-Li&y2jzf8@W%t?d`e4=UDL?*E2E?3hzcI0{ z&8FTr^@$1HTBb113Wq4%nf8OOsA=@M8)`L~(t(}!P^3Uq`7aW`>0dn{mLiv7(;a^c z==^#5@017K2|~?fE1huHFJA^|Mq9p3f&aT!QVl%AxuRnW3eSfu#WN4C?tn|p=b+K& z0PrfH=WZe0yPta=E{}5z?FiV#eE3-CavXVh+NY=T9c#HcE4WG$ci;@HZy;kmK3#YC zf+y4nfH=U%3D}Z)6XDJlyq3x%hW*MYua`qy6g|l|40&sc{YpoG@6&oQbBbq{Vd!lg z^TsDN2RnzFFYMK<{}J5MVqs6YGuDpw11LHFS8l;a*30Kae%$IS3@^#(e{x~0_?qR@ z;P`ti@V$%``RBdL4@L|~ag{GX;fk3WD~Bc5Q+FdGcs+mgQ;ad$H_sZ{aGEj{dJSXV zzEATbckUvy*1_z#;+39I7K!!Vn7b-kBlQ?6eLE;7Oy&zFBQ`F6Q)f<=-(KqqeCcJz zQv5eG5|qvf&-LG4Ao`6{`2vKwD$z?AqQkyhEqgdUqlQ#t?M)H z69xnJL8lp%UOqSLp(m~P#~^Ci&exi|x;AK(b;=Sl#z%G2{j6}cH{JeZbjy;{|DBs@pW@*evNh{dh|Cm?#**p5QwdtWxWm*4fK}0F2PPoHoQ%4 z`*K|jJg0%x+8EvDID)@e&iL{BQyxwVw50+as6X(Z%Hoh~u#<0L=mL_6)s9%nbjb*h!PcU~QB_+(jn$l*Mx; zncdO*e)EO|W>!2lzL0|3dJe~KVYcxLz#i&Ux?YU(i{7+RE;N2#1u9If=>UfLB~>>Q z2kOsRSa^M@K{t_pf$SJ_z(3s0AO9|psuxBU6b#`$!)Qv#R5y5;o z<&JzZ!SG@FUfv zd1r*<#Q69(z%p)ft##@FqaTkz({$wI`RPRT>d$)|lCJvHW?US!y3120EX!}Ky|oKq zN*BE=+c{~?u7;zhR;0yo7Y>ut8k=szpMxt9_Jn~M36u&KK!dJ#aX`ifEl;RL1LxwP z6-Mfv4g`a=U)$Fra|Vgw~;b=|RP<>;N1q5#57?L_Dkn}0Z&maXbT$=5TgUaQ_p!=FuY_xT{_sctm&vr)l+aUWwj*c+Fb~QJVhvJ3o6m5g ztHh7JApbxH4-RJv_ci|)*)&DhPFqV02uON4lZ@yPkA*3nI@}~f)*m<>Z!B5&cD*vW zm5P_`BImVDTE$fbmm*b&a^N@6QbR*x+0P&;>&<`9fHDenR^Kbo>B-1U|LfXP6E0w# z9A=wi!;`dk*GSBs2Nw(LS!F>)BPThOrkaOGZ}fNXND<2ym<)^J&^V+)v&eB}Ck z6Z?F#xccN;1r7%5L*XRMLj|Lxlq^R+jPOT2b%7YU?ys>qvJo;RX+Z!7Cr_GYp z0Mej>H}p|3EIXtC;bmR{AAL=8BrXo^JBhgp+7<^0!D8a|(V`pBHkSa00;1hY!w+6j z(T?hBqX>)N5&9J*$uUsK;N2r*1We8B!3So1^_3OeLf z8Ok)DNY!DpxtJXQlop*<0L2ap2+)!Oe>yoiUFncRYK1ow6b?UaWQB?a#Fsb>9x=l} zLae_{>_qtJbH!=?sjMPI7h#R+06p_fBBJxF1DH;oOxp0Kp>CzTPE0hKQ1`>sb$%!( z4V1@#amZZKwr;ImBfowk>}o57>c7&nU%nTvwK{`+?<>aTHjOUul(vLIqOhO&i>L3do6j!jO_+oFsCHMmlS=DcGrIYJceLwQKlI4#?F@iAn#Y#ZW4PR@Rta>jGj9<$?1{KSq zxpMgE+W_%As=Xfz3GJ|AmXdfdKuJUYp4L&hEfQ(Byl9292vkHGE@VpD2rDyoHU z)%e(003o;@1u2cNO*U%S30VVIDade(T-}BP)r>riAoBh#pQ>vdLY!;MC|y-yHBT&$ z>u|B`L}Im#Gr88g_2k_(rzeq!{OxUXT!t;1)gZNNbjVI;x63@nFR^OlH3}X&?Kgr+ zwbwH`BxmTHQOhpSTHXd+;jS&jimfAdC>op}mj#&2)lAs#vbCo>ZJkVNAK~^YM1n>D z%6+CZul>y5-omdd!<;+>ezh1 zS}&|ee7k!{H?fuxa1kw=RmCLJP6>-P`|0S)6Qz2}dN(<%d|Hm~NX0zctk8(ae0W`% zj+{vN#5^B3Kkd91GlW4&>5oY(XHu7>oWa+R=ubCmR@1)NiIi%z0_@uJ+_c5y{Ccyv z0h4@D?*!C1xPwj!=UWxtu?Qb?qi^54(aUv9D}2v=Fnk*J^|;+mCC0p+n+WsnyG2sz z*FwG^U;r-4Uk~I7g$% zF6d8<3Y0$m^TQVPH;cGPkZv(qU)H|<~E zhRayQok?n{SV2B#Ckp2`CVl@4*mE~HI>uh(7R{HSbUPr_7dC$FWuTQ)1N7t79au}0 z;MB~1<2E)n1|+@c#gBkcLan2dfT}SUX-yWy-^kch{tHRxLmL;0H~^qrh%w;zx>9#Js-1P~@@2`0B3H{)rQ= zYAL4NxUcm-F9SqCv1(^)Tb&vPt=J0U{EPzSbKqJb%lflSM$ zt-YN+JZ?!3%rIkFt;#w{{;kSS{WhCg&k2TD5(z{Qx3;*?lf2oPnPBXdv&iSDvhg|N zXTCt=1=Og|VX6s490K?a=5~C@ac>4{>MCl&iW=XtNp?}5Q!7=Vg^X`biAcdaj!|1n zGW3eSOOvhk$N^m$P=&utMiF!hd2CFU?_->9OnJ?RROnsll2WV}TRd;O;sSISaK8Kl zcp3PHFNV$nE`I*T>grCM0y1yhKFHgdcoz}~L_dYvk6mK~*4M(=-*0YBCh(ff^X!zI z;{sm(wLa8QV6U90!a95(-7^LnQ-wfR&tXP}d`eK@-Lhx}h+o!DPBFaqvQ}>5=Uf^` zjqP|Xk_6Q@ellzOf+KtsJ~zgawpTyvb=Zzv1c{`1?Q8JPgPc~sfU6XML*K8W-XY7r zz_eKbAw3ETbrH%23~6pXxaP+$LH+#U9J=1^W@Z&MtDsNc+`Po;Cq|nx0)Zp6Y+2Ub46kb zC4ZA>Q+dCfW4SXXI3@>h&L|&65`lF^nUKw&6;&$3v8J^}0RhqS=)whdzs=lDx~}C8 zo(;OF2!2Y64H&RJg^nyN38Kf%8jC)H~Cdh0@aZp|){B*MkWL>nytU6X2 z8|_y+?FowA(OiIcxwvVtTOK+R2I(J2Z4VDC%n){@0%H$Ug-I*Ts$h&^F$oD(_YQ+$ zT(kRsZBM<~7ypC5j1m%P{~<0!X=Q4R z+FS81a;!~qW+9T0!)G1UPymb{0j+b)mveqGB?Sx)qgMZhk1!~hIo9~*L;q>kg>YH%pc1Ikxz<)KPmD}(jKBy z(|%|4=kDP*C@SOR&Gg5NTgx$RGdLjyqhi*3Lc3T#)5qNQ0@@plqO7j21}_z8J)aL| zDuFm&I&D4T4n`c+?@X=-LdC?%`EHL-xWlfxD47fryAWDY)x^{ekJb@5yT40?eq4l+ zQ35u$1KnIkO5QPJE!IT07Jb(S2Nm?2_B_0~3oUcDX*Sps=Ok$ry}eYoXj8{|_x^Po zNoUZB2Ev#KF!0DFjPPXwUi#Ue^2Q$o5w^M)56VImF3mPW&RcSz)d=EB7sByN6e->_~M7JKzAw=gt317WI-0$gu`r>);H_- z?YoPdD1Ax7;bi!c;+IB>NYNjr`Y7`n`g4A#(l$MN z8B`xVn4(@F;JhY@247A-OUn>{E z2sP%jaAM|hR$+zjyNywXkP}W!-_p2-CJgf~F)_LR;zPhN+46Mzc=9lIqSs_E=4^3V;|3tRY3r+(XY#nO z31bA(wSFR9_(>^9?@Fjmb@?njBADzo$B3iKI1nI;%ghM9jfu>bXZpTP>hlZZrG54A zqK5vc774u8LzyD@;&|MWRZ!La*_%uRf`@Qfe|+hg zUQej0iQh>I0D}4|?@t~c&GWWC&mdI4uq0r;tUet99rdWOeg>>+8Bl|*j3Sh zSn`roYADz!&D7pJ0>Gcx^u_M7&Eb0srZ0#%Y=C+}&rph&LnFu7Q@; zT=OP9MY`!>(v01d7>#M{V~cA_RJM?+fhRd{1Im9st=XD0?iNF&Lc3WkCf^E`kf9== z_jhq|@qC#3SO;<&_M8!nJX*O8D4j~N*qF8}re9+`M4*Vb^Q$zPUNDAN^B?P>tFS>m z!VorfIyerAcqpQIa~UmEDK}~@nM>V@$> zE1vAMJH--F4;|%Lj)>F4UQK1Bf;`h`$)KdiLWewVMW?w_{jhL^X~l1o4;0xbiHh1& zON|yjul&oZ_yq*T8Wjg+@Ls^0`WRLBkqeC~ z(H=vxLY*1R%xQdStbu*#yl^C6B<^a0I(&ddKhvAx+{*s1$RNtsNM81NMnFI|lW_(3SKyLs##&q*{JcUp2A% z47ZsW`~;87ni){ZgTj&d(ByZPcfCsmxHe@_eil3kxJBIR!iH64}nO7_3Vcj%POsqS> zJI5(DxF(PRWma^bTQ3|5U5s=2jR0`AzimCD5sIZ+LYNEf=Oq>m!KNgUW?bgSL*Dfxp-`VGKAgjAYfi2&&Au~ru#1JY80W(6x1 zyvj7V+LU?-Jq$PcQ^wt4+bpFH_;-%_Sii_9RJH?qP|Mc`-*;chNJRzqB}xBG_mj~t ziijZV_V?KSB%-1;F^f#z(ntN2)O4*y#(0E>;6b=$$|qh{$lHy_q#yNf=^M4UryN?X)>H6XAWeZFTdzEf9&d{go*i!_1rA zpW2~~@G#BXA!KD9`=5Pg%G z5|j`p3VqjfiLiaLzP{IOTWz!+?9LgS`^@^KqleBvW~AkCSj@dw0}ja8yZj}pIG)SB zoJPg+SfTE_e-J4KhZp#9)ua409T8t(cGg!LXl-Q9#19pYf7K@yAfo zAyj12mUAL!wiH*+io2mIoCZ}Ky0mv^fn?Y+HfMD|X4uD*o8b-gezqMXLwgESvz;F$ z$}(X0T^dH-#R9{8=8w!)RI?e)+(vr!qPUsLg=0KWR_I z4X5!Z_2YN{O7q9u!Z~?N>+sp9Z76tg>0zW zbWkD9!5$qO4Ax!!xI@eE3X=PPsXXUxM%3iSf=-oJJzD>f>=RN@r2MEq;RZJ)G-uIK z(h{wx5OWGoK5VF!Qbj;(36cX&aOk~|wL5~^*7!ecMc3fDk-^t9q2j!QA!aAgv01j{ z1#|*b>A(J=+=9fc%c&i7dGX4x)y#>g6-+qDTLFU+!`V^j@)-IAE|e5?Q6mLU>!VoO z`s&<&maemKc|d=n{QVYkZS=>44wakT9R@>a4Axw^bZRi{C~;_I0`)lN-#1V`67&Lf za0ir12Iq~=Al&{}BiG@q>Ns&NcfvYsdCsTacY^D@_zy>#WBz?x0^bsYenKHsCPiw- zhc03T60Lk(nbb(XFjdvl*kq0zM83`Ol=z`iC1Lgysle#Xs4pXR7HM5-J;}ApNs$fc z-RuWaJ{T2D#9g-izNBJh7sfv)NbtcEk$OVA{!dt^cFgpVe4SQefn8=aVoca%gB!9Bt8a1dR2BFJ%erhry#XwN<&*Y}r+wi~8Fx`WQS#++IbsNGURyDKw z7p|dCcK!Z#rU+AjtfcLMjuMe~b ze^CFo_JxW&tuZKd?c47K>~PE;Z0z|p_2jVG-`+jOh@4}p!k$SoRqKRAEePVv#VVnb zRqJa3ZNli`SSeQabmTUl5+ujOfXOI-&;J$0&6qzwaN_|xStO)}i&*uwoPa-klKkV% zp=WZG4U&6(g~6{^*dZU2nht19;KibSifQt0XqKa*)uI~p5@t$#35OZIrf+{zZWJK; zHK%JMz{S&E*VnHw(F=#)wZ7j}<}oog5{~1b3(fQ@1(&&U?)era)uw1#M}>xinN3C@ z1K1?<{ws!)|4KhTOoV)}+!)NhhF_+}+v>)gYBYAfDCW#!^0H9ZOLeg9<%aM1A@N6j z!gPBBr`ySD?`15{i+RyuBSXnIdL3| zUYNdNuzTN+tq-(w@F8n}d0%5P#I!1mg^d%J6!?qn6jg6$&6L7iF%Hx=}eBDiXZl#VD+9QV@56ug1EbV~sda2b5Ykf7`T&?50ccdpFNZan1|GqQ z-vxKaJnLG|jl*lH3ZhayUOTxPPYf~Kbqk<+gmuzQ6kHCGyg6# z$oXxbsVHM~=i#AMvPR6v<@c;DcD!GOlQxDcqXv9H)}rbYr+kjl94AkTBO#ocJq#?g zR_gm2Z5*6E2^X^?XO^mBtD`^~@=vaJ7MBLlaCiddzkU$!Xd6i+5`0`=L)rOYbt^Om zP9{iNZtnB$^F!{74;NI~K7Wf+!3`dw!;(+5pV3ihM&zAX7*YLZGO{)QRvYpstqcqq zIwDED$xj5Fv(}rnF;c}2aU8!M7d~1v1*RfZfSKHBP59>$V=u$EoeKpGvzPH876s-~ zsc_y5*IX(x;SZdFsbh_~rf;fmXp7@IYe>DcFLoz!?etRj?H?uy4Akz(TecPZ;;5+1 z`mzv;B8{!0l4K=={K85R8@%z&1cu(H_2Dy3QJ_blT5_)K-EgutA=07&bMwocJU1__ z^@{-h{6gctm_2)BV#3N&J`o7*VZQSZ)GXvFN;@MHqvrhH^1nYked9nKIPCdmls@?YmLxI4EkKHw+&leJ_B=UiWhc8-~|$DDXBk zE}AFpxV~Dd;N=5sczk9auxv^f#G7m5NePX2@qdbwSLYkX%Rl=;wS+*p<&TsD+-CA* z1zeCgbK;LQSgK`3aJSrUhjnJKsl`^d(9}g;x1;a<$QEq5Nm+nkx@-$5sro49Z_`NI?%qqiR;hn{*ZQ zL(w#ozL%WO8w8Z9@NVI8{4*5<85|sq3N2M#xcZrlqQe%+@Ge!!CaE-TWijJ(&3%Jq z&(P?wGQvPFVLt?pj~=_l4c$^tN|&TD88#%un*#6Y%~+yUK$6XKHd}ozx{Fc#<8RHG z-LVDJqo7%g!C8X)qPdA%7I;Ok3yW22F$He61Dwso;yC*b-k~x)ZzUx%L}WZD=T1|NwZi41cQfuawiI5?=$dZb|-QSjEv}^L^;`~ z)5u^-4LDyw3A}$a7XfA)E(JfZ8pXd9@dd0{rSh?3934aXBv|_3%@w{*PVH0hy4J^+ za`ppi4e;MH6M^O;G*UdsfNC;=l`b(|K0M-!|nfCtr$h06@%;n%ca zK%NX7Dc=ESv(_j9R4>0U&b8=;^$SV-Z5XI!^8%TO-;1@<@h{Ph#zNtrn1L#=3MA$6 z$H4OEKarmYFaCmpDZpnjx;xM^@;j~v3r5{{q^39+4 zVAZUGw*tGM3PR^`bP1f?m(GqXkK26mJs*5wRS&mSouIcu zC)K0XAT=~u6&xrZ6A|SVE)>83x@fs&6}V#sw{~zi20Q9okXQkRs7I6izrQ!2JFjBC zr>os6v>^sSd1VRyrRiMfwp?P|Z6by){x&Q-eng2^LAnqOQTY}~I84D0`<~z$LEO|u z{4!AFwh*boZZ-=D1M8+qBqjD+ugCtlny{p7?k}=T)Q(y-U;L6VlV(u-XY0xFDgxd- zmcqW7%VgkUiU((KYvz!$dJ0MQgc!dnuBStVljNyBb+^ZxLP8cbEz>=07{S1V<9#T@ z(aRLv#TZOfW38PtJ%Phx^2G`pCr0w}1j!{_pW`HLw2$F-T-8@m`rSF8CS2Zuig<2| zOyz>9g;p=8&UGi=Ot4TP>f^SWW*mX17>H3fH$J-*YR)_9KtutLawUlz*gkh|Z(HUv zuz>JW|Hvc;(ba4vE8v-Ze6|1v4NAiz3#8+71iU=W%tAHXtG#(L6*U|B=$Mv5u(|`^ zv=HS_o}astB)VP_EV{pKlf^(SQdRG!ZGt%!(oEU8?xd|Cv;VwM?@0T3l-dl8OTZGA zG8e3me|4~Ux`z^W!kIW;3tpgC&zZ-X-A`vnY1E}trs0D{2PFSt+rr#MxiKWE+?o2J zVWiYwD+ut3=Vs-c?Cq8RLxvna3xi_>%P~5SCTF?DMg3*6LP+vda~|%cjUF^B=Bbvg zAA*;lx%nmyQ~&@jK1=JlM04i}FpPk2&~WzKY&V;XxkC>qP{i#!q>1504DGGpDq+uOWQsfYE=7!48#K* zHpr0BIbA)D*to+GHD7sCx0($C#56ZzulGy(9GR?6CQ`x^xr>-P-hMe|vkKO2D( z6@BAm%X(86mul5gpqCj|Ss`Mfl#)x^eY7``gzB*t{%c@$16YfT>!2MsBk!Yk|HlT{ z*^#?YXdPY_>LR||<^mWU0VB*fI<0;~Km-?*zSRT{`whE>fPYRt8eZ?B*L)MeM&ucW z`wogqb0eBlTc7mz=>3{X1!tJ-_>WXl6Cv00FBjTtGSwBjc9Htp4A{s4J-T}3v-}&) z80oav9XOSF|WsQhtek|3waAUkbXrA(?l^nKXZ_^w{jOeJa82VoeWUt(AlO5EBN@ukn@=v<$m7UpmK zS+jQWyKYae#qU3qZJgOBa$TRb9apLH^9DOh2xH7OpKX5rV(e6MA{X+4r-Vck9zbwZ z=|87UC<#sz*`GhZ+k`PcQ(jp7QZe$PfX_wvqnqf@nyiBC2q`Kpt$j*#D*abUVeHI= zM{+~k=CYXzVyRa+6#UL--}As%Ap0cSZQKIC*X1bYzkGA~%WdK{-KFYw)za@|Zn}qh z4XZm@dI~$1WL&k0G;+jWbIAAV*=!LirGZuR$G`CtS)fdaI#s4d#>9ed#EYot8h)50 zwH3Z?_aoKEE3Z7+7nsy0vIKgd93!7AWXHzFfM^;Y$_GV6UOfHF$oaufrHO^SPBDQ% zl7PDF`77wQ1RGXnRu%TQ4%0wu(AK0`Ue)&7HOEr-5U@j?cqI`ZNyJFk&(uS&;&?{@ zEf6r)cr$Un84fS#i$|0OT0bA6X=xhwlZU6cULo z$w(kFZ$8+@U0UAb=r+F#T2{`=47Bhj6);SEZ!Ay7Wb{htDG~<(B9QCdqA)g5I9yT_ zCwlV?O?faL{5g;8oX5c57Y|RcJNb9Khgaq!>Is|qoTwl5^L#Q`rfbn z(lEnrurHYUvnF)%=nH>BVrH8X<;JRb@wErzhRCo8BRF9_il378i(A~Gx~E_^%NTIR z0W29fB-Ab@W52a?D4)Ic3pza=8y{yf+EtM{%knH~37-t8{0608DF4;dN}6S_Qhby7 z9Mp-31nM)+eoFtb-1CnSuZ|eyhd~E25`OjjuR#xib_iIZz2O$6LA8AMLbP-zh|v4s zPz={*if`=Wahm*#5pyE{{m;-bqcYhP>nDH=!)Ma`Q6^mQf;bl^R4BwOcbF=IRkS}W zs0e84yU#DA7h*#2Fz}S05PZb#@@)r_TkQ%!H)q;C22XwNH!^pZ51H!%IgA9X!27~g zu%)Z7Q=*<(uQs$8>sXw|07I!DwIXxjnz)cEFg?EEGvwzM5^4oR3Lp=*mR;x`wuCDb zf52u+DUq>L3>$Bzc(L&no5y4amKx4T!~<+W%qeVvHzVK-MUWC^vE|qZR0lt>G<_YC z8xMUs6p1CF0JGoToH#E}Ab=&u4&+7>w)!l>-*9ow99{|nUhQng=g-m(cq%unOm9mu z76qTTG)+n8XfeN_L)Vu=r?d&56F_nPs>271oCufUXbR}=Gpoe*#(t+K9{jW?6E^-_ zY!8c&@=wxp%ItjXJpUNlk0zeq02>e(M;upMG@xb{jdE4@N|80)+xC;ZH2QS-IXwkx z-DhXVa?e}!1GJ?Q=4oG|P=OkyhuN8Tk03KEEmlh%&uR}~O5cx)%a_sG1f^rM>Mj&s z4&a^ge9csZK^>JJNRJwf-7>dCM+f4`0Xusj=5p0F<_G*Cf$|PCvINv$=PY^6+Hvt6 zepuo3z9jQq$E&FKIpJRsXRK%M`~!M2vV^?iu#|Pyz5Ia3`nad&gCP~wX$<~fT!Bb` zR;e=%+oC-PFDa}e$5#TjI%azH?w$?0I%N(p z9aS~f{+#jFWPvVPx!dE)aQ+i|-9xM|DQat+E~ysvwoj}*juG{l)0uDU{CB6Pr-=+W zB144b67lp?tqNK2U35`%p;ga5k`d%ZVo>VXeJt$O+;eQq+`R#8r#U@@Zo$;4XZk&v z880vIBXaj-p;}^iWeDIu7MN9%Vs8vds?#x_M7a1N{Kv8VKHc62g0iYa<`i1R8PCQM z-}C-{v;xN>)gO+b3K(c9vidamFcsQ>i!8V4_$c_~kA+vnR5@oB65lDir9?isRmyOZ zx+z6%uqVnr9X*d+d~dDRi^PEH8q@AuvB@M8&-R?|L~UF^uD`#;UnaoKaQ=U z3Y@N2zsn}66It!-?3^>{DH}8{k1-2KUxB8D0w!7>ZWSH7Rv#_Kl;#*Xi+bEkkq(4W zWPZu}THQl9(>Un1je`29_%M<+9HTmk^^<^dvM|Xxk^32u0I(@sI^UEFPXK1{qv2|@ z>TQLJS!Cj{>Wnjea)N2n>9l&17s+;E`58$5nZLCkIJmx3?ae%11{VcjcZ<1+Vd!5xiEX9MpZ5O{$;iVvnYv{9>^QU+9rQ>%pp*l^ueNZS2#nQDKBT2!B z1W&UF3Q9{_7f#bfB{6`iKD1}v1OR^E55Bi2dxWs*cY z;g+TsSOtRi%#YsN4>C3xQe76bcpnnR3i5xCor$EI3>lrVo1;;r^s(`4K8U}xLZK&MtFP%Q{sIte;j)rP&22@@NI(mEz%eH&da)=hOBsO+BwsKr7p%97WqV&J5Ra zMxn3kkLfsY6N>mO0&Wlh#UTJT70r=~`lsJWBr2N-{vy@%Sb!6c&$u{>ww(j0i!L*1 zYiM|E00de=&{?Vu)Fx(drEeOH$B_NZtL)v&SWI%0T(ghMFxYjBj1FRQxR;2 zN@Z)5L4-RaNy!*|kAkr%Ypi~5P5@$@LsUAbs~5fivkWL`bUHjiZ&$<&cwP` zUtcdg5gUorB{;IBYMew!H{q8EQ}Hv~@@^c6TL+g$BQMQr`4s{5=Ufh;MxiOQGd$0I z@Lt6Q3b+^Bec{*qY&UAo1m>J@{RlMlb)R!=q~UR)$ZVQ&Yy=R+>BCE$ zW-V~VueSTQ13ZlrJUf!^rh+4ao&}m)B0dWYQI;88pl-SQu~VU0Hk=G`xw5hXR@spf zP|nQ%YC-@@?DgJs`g3*lR|4$FeJAK5HlNQ}vWj<~jQ=FsZkPU{wtyiGV=;w@f?l;t zU$EY~P_VJEzz_KqXBgy+&u_5bg%6Z)Ty*Lu6J0SQxtR^HxMUD~xY(t6r7267j4N33 zutCm%J8I439bDld%kk;o_<#Ax6l%KKMbSZ1exmTEBb4ob+mZ*VHXrA|iGQQZnjoBL z5{;AzzmtNEKdVj@ZG8o+!@*O@6Bk9s3jUBeNGl~tCz&Z02=1m+>gf( zrHz8_o6{ct$_MeT14E?S?#j_$Fk4Di1=AG}#rGqW+-1u#($fQAl>3>-d_LCLG30DB z{gH*{v6v`4VUTm-`xY=LffqC*J$-Ru!T9$F(O)4Zd0L7a z43%*Mbl+PRP2@N$C6{k=)xu>DT=-|Fw$UbhlBZ#`8gv#Gkle>cLnFIv+Ap)V4j}jq zDA(2x$XKZ=geK;j{BwoizO{XzsFN3>XUD-ZBV=E=(F8Kin+zt~yd#_ttx|QR&mG=7 zqolRsx`$VCyI?&0^w|LJqwWO|ra$GlA7^^e;DYpjf9{V|FsZA*AnyC6cV<|G2Sy4j zAWh;zQB_riHT}hAm?uTNs1g2z{7dNjWi@kFsH%2HxKV>)6}cH_CRhJ?hb^vuN8g_; z3!Orq3&Xibx~Au2=E=nP#XCkGOj7CUw7;@s z6J$^-@v2Wn^Jqw962`r1EG$KZ^e_ez;p0y-7S)uEm}WX?FfT6I%pZtfFW>hNTk7?z zH2+bMFtxi7kx)tC*dgetF}s{v|J^4iD%^4X`)>8fCViE7M2~mb4Zy zCNxh`kb9>$_sue;6$9ihn;GZw$Ae4T#_~2ged7wU6MAiAlHIQ?cu9Epi+qFYnf&5K zPaZ^slu0kYcJDflKBTBiwA(_ zsYByK>XUp5q<4fgL=rFI49?qh#!dD1@z0B? zgZ>mbofcNq4{n=J(U_{n1B}zzTp|!%a{SA^Ds>Sp%>Z#fgDt57!(=GJ<{N~mmCtHw zYMu+fT)xlL!o*M6$#)|i=qj?Y&G!cC+;$JGEFeSQ)LH>4Gl^O0>?iT)MH?ACe8I6% z!08`24uO*a0Fz$4B`*Czk-MT5vJGc|0vfO~LUXy@&kG!GYwXR;5O4)-xsvFgeeC;? zWC#cL6=^WXhb8e?R#tDzlg5HWv)%0zd<$_JLq@31<8N!(mlK;I6Ycd#$_MOJyd31~dq@A<7aZo^AP7!8&6~swOginD_s$$VH z-n^oQdM0&+?1X80G7w_za6>!HV!`lT6qc+9LpdHZ>q~{|L_o%c#(JGKbaSW0)2tU3 zqD?TIMu|#ZyJejlF6NZl@I2|lk z`h7=*aGzGntgDozc4_}&U}D#9?)R&`SAs7NKLs14&>FAR>|1$_p1H>Iir z*8ZEZt@lQ1Mq$xI{F{Ba38IsA1rGg(h4}l0T@dI1{DDac+W*8(6foLSN;`=^c96C<{(+u6#+v1Pa;|;bL8s_) zE(XpcPRhonLEr*_dI3+6s?v+NK`v!8>OU1yWb==1IN{zR9D>e-E6#t#2dAag9J`A{ z`6}S_3|S`dfGr`eFnx|Xh88igir`BmQDuFj9BHva?LQy>JAmSvs*+WLo=;56s6gBv z*9zsCB+S?T)-o>AU%iB}Q@!EB?~e2`cLf9nEO3{Od3T0M+(11D07YGr-vgaUs^CW! z*QIW%#Zl_1yP!=*1E|_Vr1okQ5vIMhT9lS#l_gLBcoAV#2s#;L5=7$W=pyUpaHe!K zVISbrhAq|AQ|;b~UbG{eY1RJZkxu=+)pNjN;MKaiK(ec6oK`}B??TPN+`sCWb{9`2 z66g<*X}~3ai&(#X2l8YOt6mR3-~FTIi#E{Kb{+?b_nqa@f`zv%e)O7$)SpY)6{}DB zIYIT?mKs>&sQ~>9kg^{UEH-1ex3{3?2*}_D?em8&OJ5&Lv%%;rVB^WlCb!cLAPdk& zdRP6I|8bkpE&|qomu-$Jg-Z~cbu8=oQ5hSvI&F$G87eRRMtGJ69}}0?Wj?wZ_lpbK zhiN-_f_}dCxs?ek^XT9=JK{!MdHJ`c8OWi^`}^FBM3P%Scc(A^>*gSWC1FZ4zm|+N zPh}lkriAs zfI<2-z-ZwKQ5GtC?+ZbP95227TRHLyG;Y?B0$uD}d9m>r;jH1FgK5A=_zX0!tC_DK zJp=B4gCtQ-&@V~W`}-fb0ei%N>wMEM-ZPEYCnz7SMsBNzMVY91?uWk6tVZ_A?7cJbTwvGQxv|WJ8ERwyp0)J zGQXF#ydeMW5Ga~CwA4~;FUQ5JsXCD?XRak{fW;Uk|5_iCfN3fXy&i2`@k)a`U%Tyu zO8>cRZA)W&9vRL;5&CAOjjy|>XLX{Z{qOn6kK!NBBF5C+5f*@bmIlJr}oCn6Q2FI(nR zw{(^wKMqZs5xMXueSqdIaa@;*mbg$+Ixp4rPJ=`dG0Ggi_{^6EoMLleSQ5;Rg3qDsmP5z@*2P?RI&U|31PY zwi4h$M`?-yEc#)o>VCESolh!?6kyK|Z< zlJ6E3e7!d@MKJs%Pns&dhb0SnhAF7LQu4dHvD4yLLMZqtnFAcwd(qGfU71zzpKIGF zDJ44JZQ>fJ<7T?vz{N5mq0)hZ54R1A>R4Y(UOYgtc7ZR64Wo1$M z7*q3bZNfhA%&G!T+&;UFgA0fsusA}#0$q*-8@UX9e#%`?vNs`9tPo!%3;n}nAYIl| zHqR4pvx|flVB2aAl(UlaYz$U!m zY)Ssm+@SuJv{FA(Lg5$$WM(c1b~Uw_vs1q6^zE;JV=%MRnUnDB|Uv92Skm}~ZYx;ZU z=_?X5aNa|J^w-~A2}-up0O#OcAFAWCsd&mYvYq*mbW{tUmsXj6yE3f=s_Ob(Yujoq z!B>oY`8tf#JDELr{DR_X??mkk)gX9FNG#M723?$bh6{av-l}NnuxnDn@Kb4&E`tYS zgX?NfedvrP#(3X=5`y#w3`vG5ZB44ff0j;mUn_^;5&5m2_dAYOTTKVgl(K^`qE6hc zA_mMzT)6K-64=R$WV*QQinO!5$Lp)-;)-X$RFpgvj$Ei%8D*HWYgU5ksu#(-NQP|~ zc)vX~_Qp&s_*0QPFKhO7J^YU@gu2IGSF6Y?yJ8EU0**tufKe3i@D*$7lluInF?;Y_ zm{7Wc>589>DW>oZ00K7qT>pz91O^iuVwRI>(<-nBY*ODdz~jD|(KwW*IX03jHGFFa z{M{$Ps!Yk!w3~Z#xLgixUPt+_Xs~5abW#xrgoC4_ARAB#1+(Wo6hnn4+?2t(<+b!I zMZeS`G@qp4Rm37P#P$Ur$3m2W2C*bfU>v(74v~6su<1rXpnfRlN+-Hc|l%-$>(x!>0*qj>U0% z#DI?+(SzL0M(rTcxfAI`uz_#6p!^(BZwaMNvr%UHEqh)t6wMa6zkKR{`e9($uBkc5 z#@npL!|u3a3wUgZj&tV+cnA`0cz8yseYweI4cmy1Q?3mqei-yoC96`VkgJ4_#m?-G zmqA0t4g8N4o!c-a>2%nP=H=G^?6=-IIcyOMtt!3cf$#O^p`*}4b20E!rpBA6%n38x zpE&1uATdnTUsLsQ)f&GVjc_c01Vhv3Xz=n^v8a(pIyyVP;I+G{H(x%tD<(T$f7N1zL1!{MVKk)Rd?`4tu>rSdsi zerMwk5fq4Gtv1}JEyN^jCMbaceUz)#x_8#%qoN$ge;m0|Uy4k%mOR6pw@n;rBX>TA zDY5hhMPqca6w&mBB$rU&uNZt1eRAoWZAuN*Vl||SADFqn z-*s=5gf72$n)0>tu}r>**48vOqqmAT=A1OksKH-^ zP-r0hr+A5C`g-q;mSV!$`NNfLA=qw)UY0l(r?ff|$|uW1F+C&&_x>M--UXh21Xp1M z?k%WnEJmlVsc-|9EO1S%K{p$HxBe&5?WD*nFc=>H=Bx{BC3OLpA9&0`@jTj9Nd{LPXEx{XAQGE*YO#2(2;(` z!+epSUjx|EDB9uHCuZts(MXIHUX0 z;~g%Sv3yYmcE=!Ef4^|Hz}9+9KQ7Ec5?jNfx2s50!sHYaq6B`;Z5cRN50fn|5mSXG zt0M(JowE;9{zi;qiZ7y0iywh6iw8kLLaJ`3d*ftcWwoq`p{RcZJ$aoKn@o;ggM;9l zmWm&(R}}vHT@v3{hnF^gR`5*B8Ka^2r*u87nq7d?WMjaCrhKwQDsJ3P+ZzlbM|&fm zslJrI+*}tBQJvImr2!opD77xoC|1tEou|cRODfeutIANa_uq};X}iQwje=dz=(e|T zy25!55`>Kcdj6y;uIf4OTQwbjcQZRYBK(4A_6YF0=HBI$mqnEO#W%CN)|s*mU%ztbUzn#5%jMeT$=Xl z@q$yXttoGnNUhsja=pj$UuR{DUwOx?5x! zd7n@2tNC5g*oYzAjl7FimfuldxClxx%_XsT|K-2oq#?7sDN)XasDUDST9=#Anv7RF z10~Zb1N`?%%kSyAjU=#i$-hw^c14I1j+v~X2UJmK&0A|lAG!|uo391NU1(OO;L19C z)E8!VX!+ivCL+aHmui>k3fm#|{5^j8cXce=0l$I?OH=5y)eOi;$g0jiG%D-Ldi_qD zfA`q=^^i;8)8J`o_P*8Xm71-e{{WsVbrGIe2p(KBd0XuIg93`fkWZDKh3c;IT#Q|F zJuURqhT__ryVhY(p zf+VX1?-ACh+|9yX_;RKLqmsnh9ROCWQ<4I|>c1^5uXWOvA4Nav=O2D19Vj<0uL^gb zvTLW9+Vd6Oym&a(@4VXbr@4h+sc@TWk^uh4mZ&l)AO!s z-tsiY^Uo;tr1_?(S*jTKFEf95ZD0j~Ac3=+ zJS)57$F9&{n|;jgTi|0~Gc%Y6OCe|tJ^3C`jLe25X?W0Lxt-ozQ-7V4jW{IJ&6j|s z4@Ko~WYn^ZoUf3)a?o}%nEy`1yR^D$u>(#Kt=XWFS_^yXJJCUH-HSfBCC7&a-aaLx zGMh|sH2jcBf!*h8)1M?1`NiG-AzO}L2*Y^({Vo;>xJt{pUUND<{R?IRop*D~LHCc? zM@f86Lwya6G**L}xf73TB(n3bovoiO7AHIdQ!N&s7f)RKzM;np7%X~iraOBVViC?9 zdt#tmGp%Uk!P@u%mj`&huPN)62`Nz0_%)A?f!Q7#S*=EvR0?94l8k0*bE0eRS7_o6G z`15;a!m!L~iKC#gnD5<5#{Tb}I!ajv)2CHLo)e@kgAfci5n#bTji^&oSGmf{w$F*Nx=+Q*OWY1PoGzmK! zy}7%kpL;m&yblUIwlnhE7#e7BGPvswefMuL&~9D{I04p1n2Qeb$ddB(1uP8<{0|&7 zw9D@g{|);&9W_<^0HV~yPdy8roP@Xda$|7h5=hqv{kRQpy>vex=}Zh}{wriwibQ9W zEY#reS47+Od!%yD1d&pw_~}qzzZ`i%@#l&_Ts#X7_eo4b)!t`pSA#3beA@N?6{*6k zO_W4zJ?pS;M{3!~<{8Ma4~` zz1a2n_c-V;A7(XZao-v^{qAv02XTND@H2CAWkxh1n_-iN!@CU&_c2cT&`k_sqU^3q zUx0XiZLhUV@`U!A1{NgOm^0mfyZW)Nu$ryy&&}h8RAu0MJpyW=4o}+~+S~8fAEF;R zSVkUh*d7dWc8>&7cQE(C&=1}84Y{pmE_Zr8Lx z7)Z87#5^e<7l1JiH5QGgT6{n)6wwcf8GL@eFTN>LQrRSYMDy1{(>^bJBPCK&Ax&C_ zCn8z%_18Uj$xT2$DGQ(EwnW40eSwBZ;d1&D2cc1Fc8n3s;W5)#dTSY|9eDD4tJvNO zF&}VTKkvycQWxmhxW(4tb^ra9UWzok-1Z=CH@2Lu-n_n=6xtmGQe^${PIf=b%n9Gs za%8j~6;Z!`6`fC#rlCSOR|8);Yi+6ee3xx>n>@IFU;N;Eu`{+GJLJqyVem2%Lo)|n z$2TsZX&#d(TkqmOrs44)HcNK?;ejhR84?2`c8l}-d)n+g@m>AZ+aba1IO4goQqw2c zq6FR4>CH3y$>VDai4yFczK;u--ny|}5e<|R}*4nuWF;+`hXn431 zLRiYB-lYZSFw}YfY%q zTzg2t03`=_hIv1UCKQ@u=~MUveDtQj6c0s}GUUSu z214v?5Mm2{@th;W{|>Y|pF(Wq_#9Wf4^tW|jXFErwr`#JE3OB0?FwSAJM;*@S!}e^ zXGdauWsO26?YY)Tot-AEk4{p;h#mi}uMgqkV`Lo!3lZ4MAB(akKy}xnl+1ziv-ZQWK%BfBEcDyPF(?pxtnIA|x}68k$SwJ*`nh3< zr~?B2lP*)Kcu>MxJ0*w^1LSor7*?SMtAm5AO|wa;k3jzMGRn)HghRXaDwUA(9^ch3_g5 zkf3Lmd%y!&q%>-Ax2e&xOz?PZt~1AR2-hy){eqQSQJ>nbI;@p)ozrY8g+~XKr)9Y( z!&AXaR#g~1s-Kc$jQGpkdCvTh_VB}~4GbzDk+>R!eW6z4mD}0!X;qK-_=5c7)$Ew$ z@pC=HHn+80j08y&@panRCta0fNTN=we~&M<%1+}u0To9iXfqP~KG)Nl-AZwg2GNcD z9lDtKi*Gj6I0eT)-k4`#hgRI@M+KE@07N!=>6gE3F^SFC#jL0!)_yJVpBu_=$iuI( zTiGpd?U+&vLs^Vi5Y?|!IGu-7JChpOM13&|w_9L}0}%T~XXg+|@Q?z-*GKEI*G;## za7RchggsrM?OM}Cg>Wxp$wH11zhSOr2@2(}N-m=%H2gk_jUZhVK+l`XQD!M#A=$hS5>a!Q|@k3uWH6&Rq7RjgmJ7;@vJ=1xdl0KWoY|$`Y85htQ zHfeb!Meyfgc&uHEy?{_w?nwhLf**X`=2&U_$+bneO|MU62{2wr!!>-=Dg(U|`fFhI zG)gQl++S^!)%Z1rqpr15D^|4eQwpTM#WDHS|C|et{V6-=6F$73$y<`k4C zOL3FKZ`Iv@j|Wj<++OgzVg;$SSU7({WcomZX`k^3%SeOZV(Bhb0%iN#lbE#*b&Xl; z0iDxh^|G>zxYtmk)%v!h#a)vG!2p--;8}GJiwmqyV4tlL=gGRdQ9T9HD(7jJPhOw zaM42P4ZTgc$&OdUz}5d`mFykebA&xcU4{akelztyLP--*ztcV;ODVKn3PuOS3&nmC zdRB9xw%dd9YmuuB;B;i&o`sB^q==UTg7Zk#OZ%LA^%jhFd2T~)=R#QTe!ke6Gf=Q zE=!`{#4*mx$tBbJykAITdfIfGHr0qfW1NiJ4ijzjAc{gdXqi7ylt9F2DB!>T{vauW zgrq03`ilOdQ=ckoLT_? zI=(n&tM0MA@Z_2@$o=qNxmHz2y_=0NmDf3rPg!0~YQ+op1m?XROYvd9DiREou7X^K z(l2wEuT1*~yAx=Tzni}P&e`yV!QBz1i=p)JDd5Xu=hBMGZ_9khKKXfYb}$t2a;Jho zVl}qnLexnxdu<^HNG7FUD-ZV-nV~nmd8*Vh&;Z`GI|5-YT_O#V-$x6&l`7vvZdccZ ze6B_x>1Wtzpuwf!=982uTX-83E|u16IP#X zBTFkrPX0JJ5qtMdvpc@T8?Vu+!$?=zwww=5X5V3%vXz)?-Tub}~CBkT%4GqYEO{;cKi;ebCdv+s_c0S7*j)fS6+f1T7O zA%OCJJz4>agyS5q(tGFwlnMw{fg(rG+u!>^x9k6AME>@Ou#C7mIaQ?*&n*E9ejBiH z=fN$as5YsO%0FP=tO@0a`%Atiq|X`HyJxv=0!4Iyc+2HT7TX-9m$NZMfNRmZ*|aZ=k0 z(5+HL$ahyd;Of zD@~j@yx0ON5ZkVEAozL8Yj;aJyDQ0uHg8^k)3Yn~K-9|^`i&&`aiVcE!4Pf)n^WG# zF)ADS0*u8P8ko%JU2<0=QyfM+;beqnzY64c1Rx8iANo`vlR9e7x7f~7!GQf^I9N#{CWOWp; zetYLxb=qB6yOu{ar~WLiYTg4uTfX?wtS*Oz8an@>jtQ{mwqmCRzt`tyL5!_*bsJ_J z+gk&EWOw@Rf^+0u5xq!&|CYCt6}Uq9>%7{`-HUinqoKi}T0DdKEUCFh*RBG%p0}Su zf?tGkV=6Fld;Y?KB%pqhsI-@HQ$;VFd};3URMCngFbZkS#>!bxCXl`wv1( z#dvnc9c)auk3RF(YU>*iHl?N!F7($7XZb0(h9B5v+&!f!*>0)Q@c$VmE-2eUKe?HG zfjjxFadOqUMyp0GLB6E442w}COgo+B55}fT+OMTZPXkI6BNg?&QXlY1wgB0h%fsM# z#y4a3t4nX;%7Iq!#1q`&7Pz>|zEHt&zR*KX+v^+3{53SFTjmc1%+7_SBYJ)~8GV7%gOHesTU00<8n2|LujkoRjE*w5vwwSu z1KH>m*qt(Jaz=`n1tkA}uv12S6>QzZ)zwwyYx;=`8;ADm`U^Qm`^ISj>B;X9KN0kO z_^%YkLfy3*3?KK&(zOx!{cf>?=n426M&lRZaN4l;(@WpRdU8aVHY;sSuk#T!QnzC} zZp~juNTw9`zrI`3O*ov}gIor$YmbF6HyZnIf)yg`^-~%M()-{ZNshE0!#TPtNgX{YFx9AXRRQQG%-Z$H4kd`ZZN+U|D)9Z`U)(QKL z*-V^3UU5#}*K#iz(1a=*wD7=}~2Ht;!;S%n9K zT#1{=PBH_;iSSY72(*dv;xV{zvTQpOUqt9Kh5ScmMx_1lUO59Aq(V4`>P7Gj`8BDg zLyjWGob2Hq>qEQHMq#UYmPuXvkbi4Kf0;sVJr2+3ACv9&XxIg;V=iJ=Az_Wt7$`#0 zQls_($62qc%=R^lc%YhG&vKhQ z@FK{5mG`dCbt|0C`|62SrpsDW>yev);GQ~FKT%=q6c)$!CydzrkJHjpC}x%g9vE?< zudrHvt^FSPT8j1b4EbPf-)_L5u&}xH9hn$xT%?hX#!Hi@+Y`IPsQD!JZ~P!Mc;P1r zUjsWTL@vYBu9k%h4Pp#KQW-2vmU{NIZZ5ULb3CtfNN|@V5krVnU^lq$C!Mm_>7t)g z!u;scoRk;%e-)%W?Hyv_5LD!^7Kf86-(WuvT!q)r&G1}wU#Ru9W_(}b;E{w3MnbyK z0^;!@=cT{1|4r`?AnMz9smR>XwPv5aL1w;s?53#yF%F!?cXP1=TbCcN$;~_+*N}tP zZ~vx6$~^h^=QQwa+vvt3{LhY`)3cEP+Mdk=pb$_9Yt59p9kpXXp8KF7;hBq$q9mXy zi-uut$|XS(UBy-pck168^ne6&JlyAeEi=f?^dY=BJ(v6i#-u zWMb+OrN*bPf|Jr^`s7ba>a4#buO!lWO!i?Nox$0e3-H5cwQK2iP8f^C*BiB2*_Hn5 z%V&6klJaUY{P(bgB~i7OuAgu6n#5-$o%gNYv!PV4W&0dtSVj}GPz%RuSn7o25NIQr zE|XU*%l*bSLR*K$1*B$C`Frky_ko@Bhmvw2#6S5YR0Rdn%^QvmicCKX`?xwdyw;~+ zEjesq85uI?q>7io+!l>#XlMYb7^+X7Y-rX<-X>ezfLqtq-MwRDG#rG)d^aQMfkF@h z2aVgEH;*HMi3@*Ws)xIvhf^cp{YS}7dB3Kt%(2lA%#T4j*IkejzuA31sqVifYBB`V)t}ukrYxp9JHcXCD-A(){f`yeC1zI{TflOdW+Fh|*FW%S9 z5QZG9hrwuHgVqNbtw@9fBh^jE7&Ay%;8m9uj|m$Z6a*tEk$8uh>vB?<(!@4~2!g zs)CW+xwJoXQNnOxg1>b{Aj?{#LT~vJ`XN}}euOrE8-nG3eOmF{4)w{@q}J25qoK`L zmQYL@KL0hm>?jd)ZfE);Ni}c{=g(Hkt~zMJ`Ohq1hO^=K8nI-_=o%^nTZ5$l8A*BGLxqrX zb#ZM?lN1LnG~>R%zh7;&7~BbF%eB0`ygvU!f7c1Fa1T4U4==$m?qO&BZ{|$#yN9*t zN6ky*ZgCAd&Xz6YNDJ2A?5d@L%dA~n^A7DJs z?)$pV^EhH+@c89OBnhb43OX<@=LF#da1)8#NeK%4N*+rp&jI_7rW}e`kXH6MKs2}7 zDv1^vzSKaD75n;&zENh*nY{x6R)*(ZY{d6KDa8ofx@$#Pe^+><=A1$HUl){xf6igv zAW)Ir3#jU28KxboD;l3435WqEC1nfwS%f)n+MWCf`aOYiK_2j03*ix$YHdDfh*FLl zp{v_{Fs*aZA55Y4L&x(jn+tf&k4tifRyvU61Lq)yBg3@dbpf0c^Y4#|BkI0A-mCXM z){z%J3_N}-iManqTv0&`P>p}w_C7rl{EVRwz5~42+XEWCEo{G!_Bx(FUbF$Dd)z|F zr=<^TK~LWbbw#Gc)@1TO(0pZ*5^BrUFY?l z;O~5&eO<^)UjS(Zx;WzbY82Y2c7sb6OL(CGFHl*i0m#Fwv=19eap+x#^QcOAN^#-P|raCIZedbg95mD zx-jSoxU7dnnk8x)W&-8q4T>eQ0Stp3SeR@nRc%}UWM7SCf&Kt()yVvPjQy(^u3g9E z96OX$TnbU<1-#9VRU12LIg%u{%(2X)lk&_YbgquHEm*cVn$q)uuen00!lOja?LZ7# zIS@!BjL?$A;fRrl%c)MYy58xX*Xge^@6CXCs=38Djqk6P^-dPE;8kjFtBv7CefLSO z_n~J}oed@&OTmmmP3fc+H*0`M*yMeMU4aNk+EJF-olM*3kfk?|i~6Rpq1X2ZE9P&kK$1yggP__`eo%m@m$$p&)S3xdc%WbT`kR-^VBk=&!< zVzm^|RCb2hZSL51*WJA1-($bJl!R|K^>e6F3q89Y9v-qLy4C_78XAxUAVT`jPaPo- z9rR|eWfeQ^K@3|G?}D{_&|BOjLq9&#ykk6F`IcCt{pEe?JG#73=BC7Xy|KRbU=t~v zx)xWU^yFQ-_)M9_AqiEQFMh5VMN5cjoX$<(^k(sOdhVV*&Ola@>_o2=o7}#tE0^^C zO{MMS7&IQ;!>7o-<)h@ru+=b!bP3MO855}EYIJV}_T;2gagj5_xP1a+Z318Me1@%cH_-ESpe7>Bj)}-o?lFE`Uw- zD=x%i8O~9D+CyS1zSQW0)q~AFX4nxWub3p95jr?m)0+Av=IuaQ_boIDTikCGSi$%e zCx>EGakTi4wtV+s97EszW6vwnw}Igkg}m6rV1gy5w=P#gNcp4bni+%JL`^rM!5cq` zOK0hBKeoxdj3WgxAa3dUT4)^*j8SD-17 zodweYj|B0b-7ea0Zf@Gt4({td(7L?SGE8Z3sXZY6+}qng^K@A{v-v5|>w0=ApW0#p#~s%m-F z({X$t5LEVZ5UkRmpN8;>0xa-ZKDxPQVVNx}mV zU|3a)-%y2pZt?fO!p{VO49}CYaZ4yqam0Y6r%a`IJLmOWlR4vsvVXAinc<%Q9N z4KiPJqLRQ>!{MC)J5&ZsFVB-0_PSDTh38UmwO#4ysm8L+bQR4}zzK|R4pH;r~wWitEfK??rjk7Y-{)IsXre3 zL22zPLF;h$k631mhi8gJExU8P2% zl7(IQY>_-}_}GVubYSf8sWGS}q_N|WqWqZS)v8?1xuD02QIeH zMfHR%MtO(B4<%GbIh>RT>o8ftE7)}+e+#U_hyrKr@6J!Y04+V#ZGK3=RQDB}42m`!W((+c1BooXxt6e0Tu>6ukiWh31Yq^!p z2>R3}IF@VM3T!LA-amA)$6eeYfhb$hp7x9xkP({fY{70eQ)lg|rUm9-|HjFC1D&mo z146D=N`C@{4Zzwx{PwRrfguU4?FG!|Mwy5AMwqMAOg7gYEes*rw zm{vzzI&M%U7}Uok#iSs(ZL_=#%JZ=)A4*%s@P-AR*SRx}huUFgmdv(|JU&V~aUG0v zGYKLTz3n#Ux_OY}5Zto32C=-8) zOh1VV8DapVu13B9dr}HXw)m9L^3^8^BTI7 zG^x*!%hRHD;R0WywNq?zWcHA^)12ko_BZ~SH&b6DjjPiwPf$_muMrp@XevCeylcG5z@T&s$J;yJ$8I+1W8NTi~XxX>`9Yu9=4nqq^+VRTD-du59n` zM@OZRRVzeTY1R7mK~kIb@ho-T8Z((eHJ@D zWM1`uBE3h$3^6USER$vpj~7&1TNaK{e85XX2?b+3n}n87B=EKQLNr+m)hggs{pMi3 zGvKfL-*pC1&7vG^>`i1fn0EdFzy;UB%LV>NRdzszMttkf^uNtSplG39i;H3av@I50 z6%gS`_f(Vg**Wj#c+unjQkrKXQ0p!+K=n#K<&CK}hEPl2{AO;U|1y)`HaF=AVt|(9 zs_lt2^3YS%`pcMV z9|xk#5*sB1@K6A=R$|+YbVCEEpMEvVeg{AyViHFkT)S+s!G1v?rDLrDiog zquK+cQCME821Zr1#4A1jmdu{oCRVm|;mD?tIcQFzkeRh~luo#fp>Fvt0R$iNRf*EN zE5`*InaT~fguN3d)vSx7XRV0D(Yi{5ES{h7c zOX}Jl(E|%s|MbnXGju>M`x_a8R@pHL^Tg~ZKtp`Qpb~vh&J}{4p|4Inm`)3ov+gxZ zpgMm|@t_i9^tOTbh{Dq%Ay!&vlBN~`VYBJs`DWA`0u&_GOu;FdV%HIgVn22k;#SaW zbn(|iZ}p?SrBjlbKM5Y48y(k<@>WrkK^`6@{C9qlef0bguV1HI-NXcCCxQ?Ww8xTc z!O#zc9J605O?VW^YFAM$V`1|ump%(wSy_qL5p#aSYZH@e5N-eV?HNE&+j&?q4RHC? z8vsl?3qyXKv^lMOT&P-J>lB-wTK1Npi{Z9$Kzbq$(@^j0qF+plMjLj%gw|&Io+#owkWReHK$|adumHx$lkp&cyUVl`?;*+Cbl-*<3fAD6Hyr=QrmsOZcQr9 z$PnIY^N!4}=ep?QEIyRXkQFMt)Fy!zs>i8?hVl$V5MJJe!xU@u85RG@BF$Xj0S84? zv7#9YYUgb@ON{EgcU3Uk#RgnYx7IKL_ShZGSi5&Mgg-ea%OjFRTGKfwc-hAa z-lwCI{e2m;dmgS#nQBEYTWdKTedCSdaH00&U~tRT&vEP^yIKv7L0uD$CU2blHsZYw zh$MBkmNLdk*2#MF`%-}#1{fY*w(l>mgQ&*g&L!@|fM`mzUJP55lKG0z zG~a6c39e9=k&2Tu;5d^s)RBSdEa{Ha)Q{0i-ttx&lxt6u%=M@+jlJeNzAae4YI!kp1>CcM9x=rQJ@?Xhwt>6% z4$1S^>T3yKvTF%(FrW7h@4WE#g(;A5h1)#WEU`tMp)Ecgus*e%(_W(ii)q}q@2Yt` z1IzAey@Vkep>* z_8vy)vL5g`de>Cr_M@Jee$GDV)C8KQ_;E?gV!)HN<3PGe0I`yK1Ak7Q{XubX!Im>f ziW&^7U44cb0RY@{5w0h$Pyf;Br>B$nZ0lBZ;i4&B^;|zjB?c~p)w*wi7zD2X*2KVg zV)C*kd5CH!d0W-~#YvB6Y z4u-gxhnB_hM~Gt?kigMECn-(Q*5pn8DM-aU(LCwd4T^eKV&wDih>r`ZtYE>5S$&~h zvWwrnoBA60r_szwCVEx0Jpolix_$)|iY*7j!HvhgPgiJ0$4vpej#0UP63eLqWu3rt z*a*evGC(iJYO*ocR_yq#x2x-$f7gRE|Jfm>XGcE)sR`h#gWsG4J-Vw_U~zfP-vB`O znVs&LL>NV7AIWD(fxx)}l-yDV$$hK4rXC+U$SoUpF{8=}C?IYzMp-Um`CuLN$^Ig^|#)b7`WuM=%mPMy*bYDJPP5wAslX~bYb9AV`z=$p(?rEpUk@;sqL?L?Pm1XwHfzE1A-;#C_IDA(CD0OVz9c= z1f0`XL63}SIFpgvRs!U%WKj?o1%erH&v(FB0>iu8lF9b?P}j9> zQYHp@7#A<(&OZ7Cu#SAQ8 zhDFZaD8Z6FXOyy%GXKd^H@auulfTQ>EeJT1b2>x1O-&kw=5*#p2g*yk??U~&_>UjL z7JY}MV8lb^+VP1+rNd$fjPGvQ6K55tK&-YF08Ul{65Tb8(o&2XFgg zmUQw3=Tui7sokT)W*PnYA^{t*;u^*Kk?P$=W_3tQOlRIf+-AwX6VmK6Do*wf#WuDOQ`trbLj%w{zwT;&B655Bd7lT@c z$@XSfk*k3a8~>N3e0)|bmBuPwq2PjH5x=kaGoI$#A3`V~on5mO21q0YL2eE&we|)I z%K6WG8VGEt6RaNt-K`$}CYGb}JDp(Hx?$zNMCV~1T50c}skfV}y=_S3H%+-ro2QfB zKWxBEWe}f?CH}nAZ(Ld%j*&)TNP&65?=d@c8@KBj7_<~^{Ng-9Rw^cPpPg1$mvmej zkcm3GxB&gpXn6VD-$bs(?xpuB!hKxlQc&Dtk54+5m*6-z1tK{S@BaPnguhry?FwNT z%guud#-^1K$&4z-Fc~>zXS)w?`a#Ki z@TwrvCwMF#EEeP~KkHEUKH!@6e~J!O#p4G~?bw++iMv<=2U;`W7rh@m7Ehm3*eB(CboSq)D>9Ni&lrrQR{X*~JPtbv|9! z^bTTHEI3%;TH&i4S6BNZWnkGjMI7v8dY>J#3r-3cK-0fjs+p_$Hc}fqH}O{2<|CL` zP1f0EK0ZlUKYHova(I9tbvHn=@a4;wvRd}p(zC(GT?Db|9JR27^h#KShoS(mz>s&H zYIiinSKCP@S8Tw!N-#8k7cQ<{mEDYj)wfOG)G+FcnarjS7s`N z{`mAl!W5?0{0suw;hxCz0ccgY8;VZfjy-eBG0peKthGqwwpqgDfOUG$xJ(vKolrG{ z^K^Tu;Y#Dcl>q$K6(s=eRZDI1hg}=KOdKP_eYai*Trq@}I}_k*1>o^5ZEd7KOzkfO zj8=0;6a~)grQS0=pX7HAp~zb=>a^C}SEFVd8@o_`(`In-TgUBLs)u#2bjOHZVef(< zPIkpdg-9K1MT}yMIYHCBq!0$$^u9e?7B5tKL~E}@CHX%01`WSF%-RB-2oseBii`b~ zF|BygI_Gv%Bh&vTuK^{T^_lcKKG_xc>GNN+2qLimISL3{9E9!AP`q!;>k&h6nE31S z7r66Mp32lw3F|8gb96FRhkBLeI`>BTZ?b4mhB#C%qgi#w{G6mQKvJSHYD{Ns0?|~- z?-r=7SiYCxI1rf;U@WDvPZL;Iq5eGdg6ddSxd4@_;~LjKc6unSZ_+`_ZsY5ns>u9K ztvt0h=f7GJJDx+S!e;K8V}G%HVBrodA{Cfp2}~VJbG6cyDTCtPxno}Bf`ur*M3Wq6 zj=6l6_4tzzMF0U_W1LtM!EjTBRse1?wdv=CJT68869UCbLT_DYKxENbHrTz-<<{x7}1y|8PLN0V*6e+9^E@{10-Jzrm z4le(oyON0HP{#5U+%&&4r}~IDNT~*<`14JVsf3fOQPifYOANm^h2Wv>xp;&<2=q%3 zYN@?$$Y;I~1a5X!m2g$>kiiiD;s9m~kpgj28mAok{;U`t@rf|4XU#&IO-MU-Pps;7v#0ZhY-gM z*3y-F<$tQcR*dHaM<3&5Z;WtUW({76jRhg@?-UDtj*y|C`&EunWI@ z35cpMlKI;&g+G0Be`C(wf0E~I624A*ENj1s`c^%Qq0Qu*UbE!i6+%?9lR!MOiO=zL zNS>jLgh_#a0&$`+i0=L;nE9Ix&KSI*mvOeLa!uUE9M6T=Qd=iE-%ed0{kJE1f2Hy| zesc7>+T(SN-}2va0ZWn@GyyFB^d1D1w06FQ+^pWNy=WM;of=&5{O%G(&jsNGr?;X_ zji4~T6FU5b_b)ffYj(7m&tQFP>nYo)lrl!KH)ZHzpi$5J%g!N&h1g>6DWZW#(gU-SLOpv_&n0MB4%jr%A< z!&W6D44qb-NtiVs42$S41<7t&27djG!wzEn?E9Rp#zAYLMA~r{lf+&ttLfM1Ag^)P z9zO+?=;2P-$dg1c8CwWD3>;vt+P@eYiW{3OBSJOJYv*t{`X{Y0l6>ZBKow6xGJ}QB z{RxhmB2nHP?eUzGg_|(n6 z?n>&}uoXkCBgyBg_n#E&N{^^t^zhLCOJ?jsf7QxXpI)#VS1u5%724Ww@YCcXxqP@c zy>O`iJNC{8jHQb z-tDf)=M`=ekK)9{fIz#ueoCm^!{!cCNb9UC+EynVV<>jQXRMRO#isUJ*+9%L-8!^< z#XQxLE;zL;co&t;LFLsIap<2%7a}_MC`}%lS23d{*{&?l*AmHG8A}qH({1GbHf=&6 zqMC3NTmd|U7Vi4qfSz1MyYG^;aibe5dCwc$0nNCi=spZ2=6<5bPuwB%4`7$(Mu5tw zULf&@@M-6DAi$D+Pb*IL7V)Yq{Ug9pT6o@V+P9f;a(lQM@TpDMH4x!OKprS{|3fm8 zMD~2z#eQPM=1(|=d|d2uF7#+1agj_iisfB5uZN-tZ>?Cv&r(P_f_sSZ3pF__L+qH? z0Dp0;NGDj1hR!8jQ)y7ud5>RB>yHc#4x`_hgObsXEE79f(jNTfR33i`Nyo*eDx@Uy zuN+C*kXUw)LnynlZ2V@n;^O^(aFmd| zJL=A3KZZka#r0V$UfacNOmS{Op*HV`mhWI)tHwsLQ~KU!qf_LWgYpj=_kvH>WA?i{ znRjlb@OCuDP}V50SfNs>i@!Y70T)fs$leMid|4f}C^;Rf(SpVa#V$%7xFN}vhJ9A+ ze)%S0U^08w-Qn5=d(NbIj|m4GuT7{`+l^#PHrEq-r!g1)(|1x)rp;Nyf0;!172o(E zduW|6jrS1iS|S`ytMwu0<$8)+!uqOn_G_!ejS((y9yO*YjP}BJCW`1keZ>V{eGlmdtkX(&52(Q$s1#LBx^lp+#9@E_GSO+O-f3BE`a?P556)Wvq4+t zp%^1+&yNQtfBBfYPkTE#*;vwl#F(Zor;$B!Aj>eWp(zu;AaZyH&y|a<%o(4-J9+s? zGt+lz2$zecyrgd&U0k+8nBBLt^o1KhD$;`qLH(5kvm_uDtSrA7y}VzzALO)parN^B z9lnb*?3K@Ep4ld8MX1A5zNbd#{nIwWv%CWfZV0N#(!UOtpq0!S&))&Ht38R-VMr6~ z69$nukcqJI?5RBO#&5-al!+aS(y1k+A{d~5W5YYd+-b*ZFX+8j3P=BGIbCNk5Th;r zi{G*QxJBjpg4uhuMz=AHjDJ+swLFSi5JsjL!`U(ISt*ytBT_MiHwWj^9CG0#)m80N zl>@jjM)dP{V;F0v$#@<~SlL<4e&6v4n-r$&MDak8xV%kArOfR$QG>njrrbBh@}qUo z6BYo>3zSGgZ70UygOkPETnts2u98lBKVUnujfZZ#hk++3UJ#0?39Bv_TaSt80NP|H zs_LuKm!l?qm&+S->m!CdTpT`pNF8>Ae>fnmq4-FqZ}+l8$P8`CABK2&Ry!DjvN+Of z-X0vIoDt_IE8eV7)L1|6Ab-b&B=U-R6y}oI0cb6=1>Sd$^?u@#Ck&8Gt53Cx%*3po zUt2OvqAZN8m!@?HJJ1(Q(r1?nCf5e`)uBS-4Eov%o{wkX4yBj zp%{NkOwgU_za@O!|Is67IW8ax75=EW>3&*>C#XWb4`}wAtOUIZ>6qkutKvubVbki`i8ecT{LohBwkeMkG(U1Ci=ASlSB@s z&W2rN(3{r;$(C<7<^n`! zBILD02k7VDMdlhfa`a0EiN&UV#CCe90rhd?IkTi?hrrWvHr7TCT;KHQGesGPn{@qA zE>q-dnLSx#Eb(}5bn-n5OEs!QxR1MTzCcYDhMwvS&bEM$PqP2Uefw73eTRV`$XEpu zQ~4iB%*fVcXE)ch$(Ot7Rvq|;INBN6u0Y00L6n3W@Zb;6`%!9Mer6q?iP>n=%DP%=ye z%LTEfksVsO)6mkw`dty~5>hnVZS^L}YCFq{u&I`VAGA2Fr%QT7C+0qxCyElf6K&uW zQr+<8liQBuEgfX(&61ZYBl_!6W3@^SOfB!;Uh6#m|51g0qLgD@4GpQF|oprdy29n>!g)~|S6*`ZGjX8(Fa`iyVq z`mj`s&X(?Ohiv_@58ucBrCj04yJV-Pwwf=|dxp%CPcvkF+I(QQk3~#U<(iLj%c->- zfFL#1&CAf@+X9=ph^0|Te?CIYh8$1p&BKgFLSeqg_5nU8-xoHOXFHV^>pwB@=%(*p zY9VznAF`cK+0Og>ktafdE4=EzsQz$z!DSbM8l;V8Ug^oGh2t}+cnMfs9m zj6!C8EfcMR-K5D4GRP>9Y{|vUbge@aX_7dE<(J8JMuIT*$>A_o9=3lgmG6sN7(v{B<Xl!<-5G%xuu_Ql)l#R4P~kpl*_I-%Lx zS4ELV>tW%M@1DWYLVX4%RU2yjSHBPHn_Ou0a&=!x6BCTMEODfLJhk0yKm zyvPr7PyGAg7UdNgwuH~3+wU1SM`{Qaxkr^uU$xB)w$v(r_YzSR!q-A(Aer(YS&|F$ z?CYY2Z`&E#JX)A#m2~8nQUsxdHaKd|7zDCKHMf@1iA50zC1QfANjYGMF+rsoC&{=$ zhvuST8c&o_l+q|e6y}&9Ie8gr`F&O$WpoHEiYqX#D-SUniVat~YwT_;?0W${e7|f? z%_!;^KYhA=bLlcOr03;QQuQHs_-g7C8G)-+0J@=qW466 z*9rWgu}nj4a-q=Ts1kBbcvbr;dWYNl!`6fDFW9hmacZm7v(fy zqHuDlk^miQ1aYQoyE2;*f|9r+b1A68&ji?ei=(xy(vPe`+ei!$D+W60%S2@mgD2r^ ze?k<)><}6c_Sh^obUrYasP}yq5BB_oeC>N2hkxj=G-lnv`TyKHg1r~_DH$fS;Cy}s zWNmqWhCmEsnF+WSpPEItz{%ZynI83-R1m;aL+YndUFj7AbF}~amI~CJ>rk!A9n5dz zY^*3xf}0S;c!CchC-aF$DuKBysCYLF4hhWqNXVAY8!?{HgA1hTTT>KfQad{uCj+Pj zNTK4k*33BhSQE_&v|Wl;_&?-zU_bH+$)J$AvV?hPX3iIXkd`02gjSl?Ev>lMpSt)p zaP#V$x3<)CXlJIb4Tfyd9?VwX_tV=j#9L>K0~r;z!{4X6k3Qwez$2SW?SA$ z6e%{7zmX}(tg-=l-y10(nV+@?Z?LaeWSYVe@-s)wyVwj@54KalJyoFs4 ze2_PiLm5kp24YgUSi)f{5oDz7sdvKQoZvS$0%2#eJ1`#_KK}5I2D#u%?$+{Z^y#Ki z!tgEVc=3AjzYI$cOcIWr{Xq{-*|v)5gOo)@FEPX^9_p4Gfgx4n!tIs>OY0CtbRb*< zfM9-*GHQPQ3xnz8XXM&AvRxFtjlF$oBu z5~n~>&8N^n=RMH!yax1E;K6LYM-AiIQ=ys$|YjN?9~P4>x`mM zZ>nCL!qKr1!tL+zeO%IO`})##&vr7KA}BXbM={}jE-|q3h>NdkgFf?w(1U`IaCsk{0S0=tN$$tWW3Uc`D-z$2V z97yBIu}=^gLy$rwvWl6k;Bpd0MN%mcc7dG^I%t^>8tp13pT69+{_FQckiiVl#%eqM z4Ac2sUCo(q2OUsBuulRlLeCS!XD|#3Q#hmykhmfC3VMh1-9JWT3)G^k){s+UGKFO&##l2Y_~btS>srPL6wx=A&^xdJ0B* z$!t6*5(Flgc>9PR3^A|&HI9XX)m4?5%FEb4f9GSk5Fcn8wp8}+W?%u$Kk}W75f|uu z(&#vJ)5|2radleMb=~ia;BYdT9HSXw5qq4#$oqW2v+n$4tw~~MZxtbYFO^Y)SwDL9 zM%@rrP3P+B>Mcg=#H0BPln{=f!0m}l=8%C}UNphNT-Qg5Qsi@m>eP+c+X{9%x!r8l zJj$$Cf18}Gv7KL``m;d$11qK1sP_vru6-{vdll@jz`0gb1&Ds-I7FmTinK=Yfoi7B z=EHo(&Z1)-L|0^XlE#(L65Hz)t@? zE!0TI=vX^#7v{eE^!vwaZTO8N25Gu=hSo4HLo@L^od}zK%iZa{3WRtkstp%L=r}}+ zU5=3%CbJt`DX{7`z4tAN57SU)qBpclSGT{_Q$knBp9}>HB@P=6lpL<;$+&{S68w)VaUQo*f}Hj(laUsvYFTqCQ+U8O2=QlvR_={hEcxhD5R) z&IPIrRD~TMA8$T9K0b0!$VA$TC?T&n=kFCmR9|g;4lVP2as^V$QjqXu^%bSB7n4fSgc4It3aaN5+z8OMbt5Mz*a( z?$(5S9#w1ItpiQ;UkUi35SM6R!_bG6dKdJ$fpwh{2Mav*@`OYPd4sOVv7{{z!yWb8 zZL$N;ic_l$L3eAh67eT9RH~b~m7xIIk0Re=+#bSl53HyRD-6yE#nRD2Yg|lL9`?|) z%e^PLmzxyNHMykEV4RpBWq4#9f5p+65Y|860i?}0dL|~l>comqaX9c8SNx{M$K78w zZWPA$WY0AV1{-0mZEZGKe;S_d8w$5V?gxaaeJ4R6<>yK;X`Lv%7s%VL+n#zkSlNfz zNipl+{zx|_mK<449y?p9^G&Od$6e9FcR4v>xz?4*dNy?|+tt-Nw$MWSOI|RbrxFyQ zz>hNplKlJTFyqEV7vgK7yea)@j46}> zwU|L;KjC(BP5s26_wL+%r!QrM4hF0Ih1RW+`mVlt=WiXqnYO%)wdH8bm(EbI{rZ83 zJ8bcncVWP3CmZIA9>8<<^z-!-q>bYevHH_9m(uc3b4(iVrY?e0;)|bM9qs!Ou&Bp6 z8(HJWi{r4ZsLI&y`Uws zaU_}=-^p{o1f$-931Wqxcz^DXV1zLV2B-X4{L5hcv2GG|Hc&lZN1b3vR53YHqnlg< zA2)1PzF{2lEKEUkC?0s=`Z zwr^i@&ydMXGvrb@;s4K`5<&xGhMer)g|bQkS7*`X0`4_T#LR^!^Mj z@B}O(HMBX(ws5zJq2QkIFQ4>AvDr?$!GWj*0Szza&qTESUf=rnJC+#W6O0lA?*d~n ztWc^WpG`lka5`nxnK+3VtJW5TlySGXP%+kJ)925|D?-rWMm*pj@Mx=;G6SvdW-N^j z0LXu3d!G*si}mq`vp>ogV%D`yfIu`wx2DC@SKV`ATqBc<*LZRu@G-1Rxzj_9*Js*~ zZMyS4JJ(7d9~aZDaWDy($r-o&^19U1WF?xK zs1!w%iNi1w%MWEKrg5;aPXUu&1XbbcMVpD{z}(xkefD2g6gk!J@jd8DT4@xeC^#== z%b3|X9L_ORbrV^MDU4K&Q&2_NWY9}~j!twhOo0lz_P)`i6G)zxBF9B#q={NfKv9rL&c>BnZ%)!7=l2Nd4I3;kU7S*R zSw!SL+Zj^V_HJ{{w%{NKTIVGV_wm})ZTcmQ8gND%fYQ{a57*TNemmQS5@-QST4(sk*E7Bm|@>oXOfk7$F3MdjF%`6)MaK&}{q z3PJ_Tq*8^!@pes3HuFxda>2!RY}|@6ap^C+MlZF-*Xm;1wZ*T=AaWi*Il1&Y9XQBn zb0xm#ddSy#oepZTLkug_3^OGsR~{mQeL`jGn_)WoOZI~I{qI9OAD5?!UqJX?<`^AW zJmUNf?~fS@`yLaj21#Rzk5{C7e(r)}Z@={Ie5U9$`b_e^==1Q~AEBv6cmgTyI*B!T z2>T)6&F0tXG)ke(+3kC=dwS~HyM>j#RD`>cA&CY;DSrWlW=lwV1|W`_5S5uaq6bBn zM#l>DzAz^f|N7&#cxr+e@dxqm7pqw|MV5w{l!S=N6n~^?fe)xJy(E2&bNTf1S&v(e z$Md^@7Vd|QgI7MkPrfp)0_+=n|HpoFkcTelRhXIJU9#sk_U!IaczmkypVg@5TAJR_ z)&O6Gy4!YkPQQT1q0Z1Y%7=maL^py+7k%ZZ9|i^#pwZG>6-AJNUq#zy$L}^g^63qq zqrZQyPJT7!o@QU()SPvO|1O_b+zem}gj3E>m8=2LShdT9Q?9KHAUBnzPUn*#LJqT>4FUl*r9)F|Zdea%_-mD{- z|IO$C?hKHD`*%5LHxn6n4|vDKCv%iajDg(bR|c0xR%o*um5OB@62KXotS~k~tdleY z&aj*!{xdZN1)0FIky@N*`@#@XT8z+JRQP_Bac$%J$^^F1q=1?e z-Uz*W{nB(hBL-_fG2H`mf~imXmhR&BXH2ziv>TkLXD5-^;Os zv;KcU=jYN)OPAltoLT7kH@G(bX~=S*3AdL!^0LHi25iAc86BSIhA<#TB8w2=-{m#a zhxMZ$hLJdLUQ*3~q&RAwjQJ^0-xna-F(2&;d{15n9T-=r<-f!BGBKn7rCXhXDp&;` z#s)D`h%Y9+>3oEuQXQ{tZhY2_XK5e_$Fix@g?as2uK5cz1=RB-x3ElNWzDe&o~iJ^ zpzMD4EAjQeny0&?8)X2oQodp)>V0BRu`p+3T$=CUDnUyS(|a+52jpnaziQj%JI#}J zl>fUg40#-wQU)4Y0o^a4z=3MIvt-u}I3IDnX2brAvq2lEh)rWi%-VSe0s4BnuqJ_SSOjLeY!x=x7{2!y3A;j6!8 z=`!+=uri{y(ONETuNkV{ii?;?p24)lcZymlQ3*{c^&`Pv(4br z=UxZ53DXcLl%^okBLSppg{~l$ffE|Ua~k<@`uX+0i}x`(={7jH5yavX&Hf>Gi`wen zBBmbDUk#t{xSKm%#f3|7E74e{RrK(Gc%9qU3Wd&)8EI=UW5s}k%G=gL+7+GN$>9nt zZU&fT0>6FlLwUo#UwvFH_k>SJgW9NJy;uEg3oehpoTshKa9vqR7&e$cyogOUqlV4f zD4z7$a6sar2KK7KSQfXt>06Svu*KjuMq;TEWmTX^Rlv5;ZLH~R+ zKGfqBp!XOV8#4hnC~5PYd0kx-Wbh3e5Z&>W*j!z`D>u9U3zYLa+In`7^zcW3bJFYm z2nb|<`CI}Y$J?MF$@5i-88&Y*8UjEpr$O>1tLM5Li$1WC$AOqX&`GN6TbZr{X}+_O7aq?!rgnHzfdK;dWs*_?>2pkp9)zd8-~&) zD9o#~Enuks(&2B#96N7!s3i(IZ@0PpFK#9xyYP=6s=FR+n^tH$z7mzO&R{d0vq z$cswjXYY?uB~j4{CG}L?-oUh#6gQN%|HCuwlH-zGpWutsvRlVfj?Rdah7HQXi~(M5 z-ViG;$$IPR!_mA?r^lZKp5uqhOOK2dm=arKUNJ?4%X?=hm8EedjJw8vy~OJ3YL3ba zCAMi28|N^@3_#l%1ZbN;l%L{PqcC_Qd)52zr@i!#PW)7)wpk1gZlYNTL=2BufXilo zED#+FS^!+|i;KMd+B$_p=tL((f!A^{fVcN^ufEb&ES)g27tgW1qp8Ugh+4I~V}2!_ zc|V+Un=W`PM6K)#4IxT zZiYqcYC=e?dY(#N3>p4sa9zIPsKItz7VB}JYGFsMYgSZwV`EjrRmUdJNNrvFhHlP{ z-G^EmJ+?wS{a>h%bu34tYapFziRw7!fkeS&y-;`Do(9OMWvQGk0Q%vFLu#qJz3`mj zW`M;ONhcO?lvr`s6*Z@CDZ%%2zOy}2V7iAG(XF}`Rjvz z8@!*|dp_uH6#Z?=nk0iAoY(}kHcNqzHR@Bm;t~_CYDWNSYrg73Am9sEDA4dyMJ~k$ z-fsf-@lOBE9)OwHfM;y$zqDXZ8(j5OI0zinmg;!!z7eUe(5NU2BXwfhI9dPA zfZc8Vr*O>U8DoB_SE0HRvRo)eP3rWT13A$N6pQ%*F=8P$AQ{^YY;!D8fQnxIMXM;h z9_$-58Ou62VNcx+E|wTt+qd3r=ZfkP0Z!sva5xaNa_!71OWpt;(NCGZ95Zx$-&${^ z#;XM%3}qP)xa7-`IGg-#Ty@pET=rFYn1|$H<>n_cJ7DWG-+-EJh4M#8;hZoPaQS~< z5*-OiUi1KIcpX#JtcAuM*E{o_j~5dXm*;5IEQ1m3DZmjJhn_$0-Wn|Yqop@dolPFW zrV~tYi)YFo@p8#mm^e;(FSB>_jp640KeE>KdgRlP)D27Em1p2p=ffJ8Z#x~yFE`OK zp%$1nL9xs)LKH*jvwg2OzgJ{zUuLKMs=<5smmHBBTa1Un4}4EP0t5^YWMI?zn`Pd| z=?cRp4^K}3lzRb$@Z4?Z(RH@;OanTaJPRxoO2K(HG7!kS-l{m6}F<*ddKF^c<6^>_E4_5CHaBUKR#o!>d1n$+DW-$dqM>%V?CoxRX+yK9J2!0YId zMe=I$>%f_S^ZWURxt7dtcNhW33V~<^rzzPt=T@HTxv%?xh>5$5UM{JJ`-kEQ?M+2p z_mQFh^pZzUoVB(jpIzUaqS4n|();&ZF%GM3xMj&m?*z>@CFUSQzis}g@z72f6%rKe zxTtW{(b>+Vjvwewp=Q1mpC}0D-0yOxQlylKn6rEL&C>br%$0-OMt? zet#@Akmx#DHz9d<{!`7f)#%v2YF-OC?7JV#y|g_9MpNTCQc@K7`FKXK`Z?fL>;(Sf z9|sBu5)ytNcyszMwIL0j?Y)x*2#0}_-TwtT7D(L|gpx%PfxgQ%KVWuBU9Y`3TOWN2 zjfhW0yZfE?N;XE}jY&w~T6u%u z$CRhN)yJx*UHXRt>R$UHR<0OV!qf$CMvJib|5Y`hRe`y^5Q(rLh+G4EVG=p|w9sYb za~u$6o&Gk0HX^6vfyzGnY~^cu<(|Ny<;P}M{Rrv_@sqV)B1?Tis8EH1L_rExXjBjWnyF{2d4}U&B-Tw}}hzz`r6nYB=wCeS9$)2T} zs%I4uR8|;#MEXw7@voU|D_m6{5`c_0v8>0)rxW@Pfo|g}mR!h`+Hgyyd_lvdW8jG= z(38*tR8#X%AgXF&{uZEfEdKf};eEOVJo=wYIF%u3L1!xm<#+wmj(*3!tL8g^;_*@0 zXkLI3`Zk%h@G>klO;n`b|3Asym_{-A>SWoU5;N}c!a3K?VlTk-7^w8TLt?Mj3b@Xd zj;Kt%-{LV93&+${i-DoM}^GUFzjc0-~9)s1n&w6mxQEFx14XR>)14+Y7j@vD{!U_(lF@_}dAnJo@ z8s@ruy1o)<>Vg^^wzwc!&(#c%m;mUmx0{g~+=2h|JA(sdp)2EM-+$pd+QD!;Q{J>c zsu5p@U`cg$DD``kL-m>nI<+XMRUmL)=S~dI|CTxbsKX$#@^`6HrL4n{O(( zm$u%T7~)>+OyKDyK?e>FoIE(oQ`+DyUx^4sbCi@O^aEE&Y|U>+*QnwYH8%RLRdz$z zp?O+I_E<5UnB$a-A%?b)zrc$HIJ3k8AMb%_hi`J*m2uLWj1p_ZZElOp=Y6R1%+;2k zUQUk!Od&Yy2nm!|4650WE|B)6(zZWUy&O0p3nM`|2F|ZwpD~MPnmsoEQoQs!?k0xJ znzdiy@{I7BlFBM2X#xxZru@y&BQzikd`<2uOVoW|?QwtNOk8SXFYqbmc93~3PGFYk z@>Zm_?xxp^+c~jdExkP+}{gz)0F0bI6evIZod~m$k~95ex6wxt%^*sdV>2L_q_? z;sPICa{`YRz5~$9pEklYN3q3oi~copiw{1cRIx?L6$o~9Vh@0)`q|t{twbBLg$Dwm zTcKhmUkpuBSpsJkmfHcqlrlH}&*Mm08qnEk%4FCJ979UFezOxf*qyvtOBYim$9Uu0TNgrHhkf4Nz%S__vnftY&3biHU_b1 zpqy?DeDn3DY-IN)CQr<+(#JiOp||nF+CB z9$x9M%OE&>6dRPSb3n7m#!*IKFvQp1;gLT$pZVPMtLbZ)KZ8PLr1ZBL_0O!!BN$;M z*f_RW%dg9bunfXcnSnGK9^Yf-6HqR87{=hP%KSzerR}kJor#fe$^rf3dC0R+W7QilZo^$!KU4jGZ+w5){q9aD*RN8 z`8k$JjLV+`UH|n>Vz7`ly|Vs^g4MmJ_#b zhyaoYMC^WSoHGET9b?-Vvfd(l#H!xu%fIib2l?XB6k!2_t(&Q%;$WZ93iR_)`#qh&>&8fK?+d8LFm*8G=O6Qhl)hx& z$Rs91F0EMe(hyb!TUj{;!6P(mRe|Ul7kg1)e*TlEKR3wrZ4C(YSr&HEHA*Uc{Lt~T zqQRH~LTd|Ze(Kv(p!aw4_xI0K!Ij7O_f$WYEJ2M zETM-2ibc;uyx_xw=D!Gq>($JOj_}$Z1PeX{JdI39Jlz+>|NPHb_okjZ@M>6D=dHnk zgDR24sZ+}r4*TH0#l9`ESn+-{pv!QLIJenl%9?ipf!KVVEVf<=^-nc(!N*k40zCaN zQ7strVdc1!*KB*edZ>YwJ99;YyOHke*9B8@KgWzAfWJe1xSgzteA50_4V}()cf|_ zs}g}Y|CYw#h=!IEo{|U0b}7jCRDnVY%}xa=U1{Zg7$=diUmQXQ!U((Ekt z`zI~URR%*G+RkD6-`IGU9hyFzw=BOx=Su07ysWEv4#gwn2)Ns5-LBt3h}Se+B!3M2 zWK!ow3N17YDgX`u{U`GOwUCEFhucRf}}ZIP+M91 zRj#S0r;t4u*hJG_i-7puUrI#3dA9|(=KmoXV`{eU-or$5r{|-pBt9Ii?F z@tx(~;;?MRRDQi~GV9W0%6u3GJ!ug6>v(ub2(Ddao6t`Q45pR3*v5-`G+2lM1Yq5U^81DgP6iRmuz!Si3j__cLV-SAakX4W&VsFkVNqEjY+@@9gFu}HRxVbMY2$5I$)0d6UB(R$5lX^H zN5@XzonXG4fE`7!=i$XX5Nm3h{}}mQY@`}qgY`o;5Ik|~3zEX35tfA`;n<-z;n8ro z*JI8)9|ULoxyWbipy681iL-7Kr;GhrAQ&0l@O;J)@^_N|Bd#U1VXixzL}j(X-Bgm1 z>X&b4T^(PVir-E}12DGo>^#dlwv(MZz5>Wgz5vFdj^+6G!U9F=ztBg`Y)k#!OKv(H zna%K${niLw?mA(5AI|VdpJN727Jt0XN(EtZi40ZGZc{#2+sd zj4ALjxo_d5jTrk*<)~Lx?#_N1p6r0%>3w*AH*5Y)>aaRsY&3OYCX?!08F2X-Z0;hRK zqqs6dho{3V>ajX5W};oo z%D4D+c@DW=ckuB{gMZap&vQ3C*5%T1LXzCg;QXVrTE zhWPv=ghwwM&J&?R*KuD;n7AHTQ!rHYWgc%+jSl|X+hrz4;m=LDRTRy0(Y+c0d1 zK4&t^J^-dIE*9e#<1@*fPkpz@w-OjR5$O-W+NV{B3yAUFTs-<$_E)R}dDCVpYggWy zAk=RpMUV?Pmbl!+5ZH6*8YdL$Gm)M6nblksoyGK_Z|*aBw%5*7ELKat-KSzB0iYF+ zX=HhACsdUh%cauzxZkS#Pao8v#S-8Actu9&hPTXU>*NWKrw@~gk-{vjBmANsu;R9s zFuNwK7YaD?*1x$`$csSJJ@QDqWU15<6mksfP-uw#1<~=t?V>>mlL#%56emEnN z^arA&q?!Zrf9~a;d1sHz%)1kmBxxE1!z~PezdniK127|6dIpCRH_p9l1W+ogaS^@i zu|r2*;y@6YrM@WFoB>2zi8M_^XVgQPLdd`}7G#VlAah!Xs>+Wjt`HriPbDa8z%ch<<&lf% zTo4$3-Ccja16+IFt2>_o4*Y9+&+%YO*nf*myw z>0|#v4#Yvr=PHLX4{2^&2oNZUKI{`0F2*?RR)2fDRWqu1SgvnawMOpGhrKoI(R z@xM2=wpL^|N`{g8;VCZTpY+OKk>EZgdRDca4tsJLSI?QTT$f}xy@LI@x~rjB!@{Ef zZg+Eq9?&7jW-8+;`%LCIQxkV~KV9)KB?L|v7OuZDTm%@s3z*_W@rFopzxfhF6S>7q zKj=d9eZxWcv80LqXw^k5`JlDhtb3DdUZaP+yR-x4#*U81uDKy#d2@VUqZQhn>*xa0 z&Q8^@v+Aw%cw;R?|DqQbV6{tq8?J2-vI~TqES-cz`%pEu(q3Hc9w1A`oqHdBZ<^7Bb7T zL#ko?JE~4L@}Ycw&rPs}+B^H|!k}}NtEC-ir3FOlO%2Cqp)!$);(h zmDTxLdvU_slIlv^nJBpIe&F+Ww(X_2Eme@AyL5ikTyRXybP|u)A+wfV?*3l+>K;{# zdCoA_%V$+l+E3_5bYzh00vFU(2uzZ4c0QbPfPxKs0ne2n5{|rppc^5IOoUe zphXKh6(`EnXWI?~f-@K0E>Mf(pYV0k<)le{K_JuWIh%@#hDCNOLgS5uNDKStP+5xN zJRV|*oa*s0g9K}#bTAy|K7ar8`>xSgq;h}4{a5R&Wuz6onDdMEiI~@Cyj*@Lk|4jA z%nA9{%={W8;gQ%VDETm`mGm2YS?Dl&HyRN%sfO zO&XTCWhwo4IUU-%x&Ybpp~=j`FzNx92y1+5GVrQ`?usej>yl6+PLPKIW!Ut{UFYxz zB`G*YonS_#yRXm3$d(ZdVgD2|Q_32WfLmHhg~-1VdoXkXsrXOkDf|%A(0WEJAZQ{j zt34YE2XVg=nhut8U4ZqW?Z;TahDemZ!P(N>)78PO&sE`|U88Qg=vRzUgB=ZU3igi+ zIw*|2U|HrD`3(rdpzf0z<=-(2ugIzcf`yQbeam_@L?Kg7HgZo$Ke`-@0lxOl0rS}u zTiIJ?O)FH2cBum-6J(DG>sTnfx0Mp=f^GHsRcXF*3%#%#It(U@JQ!>zgN@W3+WY=B z0T4wh-YuGH^p4{ypboqkeE~=!=A3U}avEBA2kBn0P|>^V7-5;9&KOVHF{fW1{MXg+ zh1OPO)(|KtB;>QcrjE8esg@k~DU+m14+=! z+Ze+8Js!?3U3oedC61;tjO3zbf-S;IWezJ4m z+wp+$@-zHE5Y26%cUSPuoV_cu8=sNl3%t4part{-@_ws$ePCj2;`xfIbJXL{P6+z^~MSW)ykIvR>^e}_t=jB%H+mwfnP_^;*Egvb>K zd;4}dt{c3a!xjt8VPEd~=ai7N(ql?Vgs<8HS-ijeahh*S8MzB|zc`9RZ^bzB z5cBgO!KaV2Q8Z)al?SfI)ubKe&92}PKLL>wl$rj`lUqjf1>Nsu`XEr1CX584&H(dR z{x~EJCYT!R!e8hbB_Zg_|8m!YV7CAzn0yNJE$}Ac<>SSELrbfwbP>#t4m-DAsLl2h z_C+6DuY#>SqDBR^D~wr1n7}8$j`;e-sE{N;iD9`Wdc48IbGH$#zpf_HygJY7krH_d zab}yD&Ak^lwIxeG69~+SQ0b>kHvL=zo+zUqo8>Y{uW2VND3c_II3y282O1v|xrS8? z8acVp=6eB~X$Ldg1F0m_QEsr>nbT4KnaFVxRpD2bs%%VtR5XfD;#7p_leYZC0vZir z=w3c&CzQMDZG`CHnb5bP)OH9&hX4q~keCm(Dta~e#Koo8!E6=rAHucBLeJgK$|Nk; z1gBf}nT~r({ZDj|yC!J45Q8r|jDpF$dz6%vRa*xG#~SbJ(D|vwZ-Wf>&yr5dhsDdG z(NRrVo-4;G{*k^xr(IV}7W)Q5Pa<<~1gl-Ua{I<5XDAo{WPhIlPrapT6B493eex$1 znS8^=l|v56;ao4DVk?{}(jmFc8zATG65S|o8TfBIjWg0Z>rfjaasSotHgAH)57u&&#nk2nY4P!f#{Iw_ z9fw$m$53cCwPnJ756 zobM?KZNJyljvZ}7ED1x0PAE%N>OP8jcn(z2-H1xr6RB^<3lqElZUnOUf#&^2i-Z0; zZ{UXdmTQppr@3Duxzi}$2ioD6T_EvAnnb-|o#)R@P_$45`0BHR@LXyKZY&dLWcP zRYJEzX(*g<9(5vBAPi~*i6% zU%s`GJIHZ(9I)@oAe-+oV9nVTzHb`n6kQIeRVvR%XNSf%#4r9=r2r=9>e~F#G9ywj zoOeIEO#h@Oe&HZ>u$oF-EJ7wBSzC1~i7w%%q+_|2va^XITsHoliRaf!qv?WUqf)>3 z+UB8>!wVAm;Dx9`768Z6+UlDi2^@`DPDzF6fpA?vLNqqZTiX|pG%xtdPkwBmI-EG~ zU103nCT5O)9*r<%byB0NirrXP_-gF+@y`%uMWB3)hR#>_gpZ??C?G@v%4f1SC}?N} z*z8p42wYB8Yjb&+77b3|Wnu8`>Pnq!CJHm^sthKN;A`&wh_R&1`DqeuN@@`3Pr`re z1{zY`EOOWtc4d2a{NWY0+x^=JL{u{&=98=aT}_#q5pLeT=vPeDRtiv1b%KJU*+W4D z9@vDYM!IFm>PufHOhY@qXrqMb>g@02`>dT@2d+pt{)O-9e50$!Ss~f}g@r5Q$P85+ z@k^sk+gf}S{CCnFFu{b~X55Pzl6+AnEc~qY#?-q3Ev2BfK3z+FvzWKq)qo!7dNaN_ zsSRgH!0B;X!T$XN6I=^Jn5(%)G~Cpn#qi82GL5LMZ7f5sF# zp!DoddVGxemCEhM(&!*YbZYIA`Ip3rXYMy{8hJ#2=fgb+-|eD`ZYARzTrnwF$Mc zKp#~3>8nqA1z}8bNG-9!&ij=9kJ$`Jo!+wfpJl8rEiO{MQ_c|?&|!E`$z<;uaF4?? z)zQ(n2SPXAa^BKoXe)%`f&4sok)$t|$pS9|1D`{A`T_Cg z0YI%^Y`loCSB>^!$CDZ|x z;RD$8(s&y(y``vtDs&^|<|nv@^)~NVB*FDRcHz5z{k8#u-Cv`&h2ihHV^F>cM4*C< z=9k;N&j1MP?OD##pXYagMak~>+;fqPZK!OpDQm^9@+*8$=4VpWXAFroh>xB!U8m)I zxY8ioZ8wF_*nS8-N!RDqD@6SddB21Zl$r@^BNma~W05z5`%Q%L7fLd5seL-)|R#)Mt`yuq+|w zeHJ{Yz@D%Ng09`ryOmK8R76PECGPZ-|Lwz1_voDaEfzpxq|jH(NkmBKu-YL3Qwf82 zALzZ4JV|Q~#iVl#)~7n7?vIcEkEX8cBL~&%;17I;Zg_lJ4R&GV@&XxSa77&ep|=SD zJs*L_+4*H-Q&Uq-O(N}|KTsYK~7Ba}DCC2H==~P@e(_dFZO-(2i z;iZLM%FatFk7CUjW+rO%;RBJadkjPt9rV>fGDirImQD>O?8#jP!7Vj_*c92+31swZ zd5fuW^B6%Qg~H%E~l?S=CW)kOuNWJ@!V;{70DHdRfz*Xc=1hyh$RR+sF9@ zcBhw&h2y_qUpqs|%XxzjLd6VXOn?WK8k}+@kVRJ>4A&Avc<>qZNUK>V5z^Hi4ev-W zocQZJrdWe$IKdj~q_b~gNroniUp-5PP!@7eRo=KE=MH~2CH4URm){j(!7SRO@9m_& zD_-!=h^oE70C7_@z}aMuHFA9Amp<~Vx&pWuc%U=K@VFp@OM)2*dHN~L?|1v9K6tH% z$4lOpu;R`;*8M8~lzwq>@x#oS`O-JyR{s6s-_gupEF#q-?0hWhAYfkR8Fnv2eSsEB zv~cx|s8RrWSphSlOBN9pzTOI@=IMWn4;nrk15nywm1GcIPFQ!9MfN^#N81i=q;Qxz zP*1)^|JVR@JZH<4W>PgvRo)sTqcKsVnZ4ti#B|h>L?j*_9>&0L)Oo+nW&gX;{vg6o zsg!f%YBlf!EGi!)tpG-# zQ5$z?&ya%LjD+m%d0$qM8YU-e#e&o|!H_8!ESPEci}fWQimN*51XT~1_1CXoHr48wN(eobcvZ;1qNXcMjU+Q6uK>q+v}88^LL!$3&_MILJpDC-a`JTi9tmgKkAmqUY>J$h4sQQf$#3HKR<7=Wtm zl(I&JyR#LBgmTmueS%Ydd(}?A#i~e(k5|u%i?HPjrO?qq*4-1|BbnbI76pL&%?+S|u?^Hi;%Z!hBk6bOr;&K-G@0 zLIi=?BGl`XaKF7<{C#&9c>6O4h!DNC6J*-c){*hw6H;YjIAZfZ9b$Q<8|sZe>erTF z?#H|0jvgVc&pk?ju2Yu_FDa>s;uVUL`L7fag@Qc$Zi`9%=4Bg*NoeDWN8C`5nw?2k zUPq^q+;QTPd|8$Q(KKJ2GS|Z#E9}x_r3V>=?!UxJD;%@)uw0bpTErsW|eE5gPsQ@ZRnkxSNk3rS2%D zne%TRX+R`1g-!N`QVME#$jPQ+m1D2afo>-Xf!+936QH|lkzZ4Y>uYUoJ-hMq_g{|K zE<|)~eMmbq6}g@LFa}nNk(RBFS9UVg-nsP6`HGVPIXMr+X@Zvk2LuZgPF4*!hrz&NkvRpA!u zAJ4@In~H}A{|^I4Y~DRPFF==9IUYvSDK2_R(9b)7tnL$(psJZDLrFG0D4-Hy9 z=>76`*lZDGGL*8cP#!fKr7jY|yf}(t^)MKA5GJ;G*w9_>+TPI*WN_udzRz^y7&Wj? z-iYRUMHUSXfnRt0vFY+u38XR4oIrXs&t2e&KXKt56;coRYtQHC=ey7?xU?><ZMs{@PKY@PU?A zIeBb)>sQQvq{V^trJBWvBz-}16r_r9@^jywq2e2HqOH9=>b%z*LQ=@(+YXU^A?M-q ztsr%U^O*HCGzciahNXY|`?1aTTh3iU~SFd09Nk_o}XzagzQ-)(l+VQI0 zTwzlEfd%M%i26KZk;8)TDIZXF9#^I@K(Y9PK#YYJ52-90gS?UW#X3m}tR@CYP0!Gwv$? zVMKyK`Z#zo(02p1)zuEB(Zi+AZ*o^or0p;@B$$YVN1>1T+!==trzxHHduoBWppM_S z?Pr6T$9KP|Pe*pg6ge{St`>;IFRdQl{+AS$euZj|z+D9O$wV7M?PSe73AVmKW)0zoTW%JTQx-0B?&SZFU|a8oS!zY!0cBLRu` z0xtBqo135xAafki9>Yx+^$>*}L1F$Mg?S84N)V)~@I%vPcH4o|1VwAsv*_K?iH9TZ zbF1$kz{9*hEn^F|KX4lR0t*ad-!a8A;Bjzsasu`0oDyF?EicqS_U3Utj$hrsKGFMW*rWI18X*+*UNLoT@db9vnHW{+*D2=83gD1mivp zG~&?F0P0^BW)wqL*K38ZqC-m{G^apP6dZ5@U|~0KyOSB?{1FDP_z$y%AbrK4o(7li zXHO`{d#Bwt0gCQ|1w|V%-sZVetHBt~u_*7pAr?!$bR*!``>@?7bP$s=Tg``WUp~JXc%B)?h>rb$!)ZGM zb~fQ5w7;+b7NW@=oSXtiZL*49--0+#tZqR7K`?Sa2A~Onqas` zbdg4uN-+s1MkxZCM4_aD606efzWF8lo%WZ~^v^aj5CYZMG^i%1Jl5gCcc7KF)S&8n z+vr#9n*7=wL{XQS@QVkEI=t$ftt^K}kwjuFK{u^ut_+hV`fxVJ-GUq9D^Ys)8tDe{cY>kWT^hAuz@!OD| zHxu!(gcPNojxVElUR@$@#M=*8%>W_WlvxafU`)mBlS1a&>HB(61|^TOlIUoz286+l z=WZ?ZDhDuaG_ClI6i&IyA5m`aGJ!H#+XM95)<<(@wo@}m$9uwYoZi;&tVKnJkbpt| z1kLmhy9xnk;{L~|e|Z0VSsU0RH$8m`mdvtzzX*mII zK206EScZcxgJk~$kHSwHgwwM1w!hzL3kllTr{|X#NXzbvuk+jFtEei&cwjmhd-cjF zpLmU;rO$5eb8`IZ8zklLfi+O^bBSN|kP!Hm#0noClKh$>!H!= z36(IV6iL9L%m&#;Bqxr9A~=jY>YPkb#f>F8hO?xcr#yf}d`Cc~N+;`daj;(5Xm5nE z2NJFm9vbyKr{lZ}PN{%w3SHUa5guZTP9B{LB2Rub+9S`re+n}aLlgkcd~|;E#a=N8 z)a5;Y(!wD`bZ@wn?liAU27vNbW-CU%gBF&1!VAHux}^9rciH^MC+{!{EZaLf1Me>F zi`N1F;(jaF2Jd4z+G=VVgfngE)z-W0;JoE{aAN9beVwP9iNJqB)yJJr9el4wt@wm# zK1#mo;&zd%odskriX@p+1uJ}X`(9Nf`gjf2#`BS zJZw^*Iq&HlJBK{3@Nn6-RT{VNuZ9=A+J4m0pmCd02mkSndIp5TU5+Nlgkcw1S;aPD z9ojYhT0OS1sj#lw{gS3Jf+e`x-VhDfQ8mUO07-TaPU$kFKKnu{9|M`u40UY3dK<$t zN177OA{aiyY?=a|N|GO?QjM1EeHQUyiyShVUi7!65IE26-BMw^8c9#n_o^`F2-BaO z{*p`n`e;K~7@b_H)O9NeUIGOkji-mlG4c<7HTJ)YKJA=|5Y7}j@FB%1=_f-_pa7!R{HC9?^8j(?6XD#U|FO2r+?ktNu<<2-xpuq^71mprdbUrC^R&5Y1om; zfQTYIF<#4vaRZQQ>&?#SMnzwj_ANnF|(hXMDZtmgw#5bOM-?rSAUJ(Q!Hz zcSSfy>ARgBwY7a`(Fc?(fDQ6}=FTC9pi@L;bq_8_`TzgN3HQ1b${#hsZ%~<`Wf1UB MQT{EgQr0Boe@BZ$g8%>k literal 0 HcmV?d00001 diff --git a/scripts/motus/example_inputs/state.npy b/scripts/motus/example_inputs/state.npy new file mode 100644 index 0000000000000000000000000000000000000000..16842ec92275b876d4ff311883250663780f7e2f GIT binary patch literal 184 zcmbR27wQ`j$;eQ~P_3SlTAW;@Zl$1ZlV+l>qoAIaUsO_*m=~X4l#&V(cT3DEP6dh= zXCxM+0{I$-COVor3bhL41Foy38T)@MVskkA?V0_Pl}q=Bte;_{*VJgwz|a81xIlwF E0MUOcTL1t6 literal 0 HcmV?d00001 diff --git a/scripts/motus/run_motus_i2v.sh b/scripts/motus/run_motus_i2v.sh new file mode 100644 index 000000000..58a68f895 --- /dev/null +++ b/scripts/motus/run_motus_i2v.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +# set path firstly +lightx2v_path=/path/to/LightX2V +model_path=/path/to/MotusModel + +export CUDA_VISIBLE_DEVICES=0 + +# set environment variables +source ${lightx2v_path}/scripts/base/base.sh + +python -m lightx2v.infer \ +--model_cls motus \ +--task i2v \ +--model_path $model_path \ +--config_json ${lightx2v_path}/configs/motus/motus_i2v.json \ +--image_path "/path/to/the/first/frame: example_inputs/frist_frame.png" \ +--state_path "/path/to/the/state/at/the/first/frame: example_inputs/state.npy" \ +--prompt "Example prompt: The whole scene is in a realistic, industrial art style with three views: a fixed rear camera, a movable left arm camera, and a movable right arm camera. The aloha robot is currently performing the following task: Pick the bottle with ridges near base head-up using the right arm" \ +--save_result_path ${lightx2v_path}/save_results/output_motus.mp4 \ +--save_action_path ${lightx2v_path}/save_results/output_motus.actions.json \ +--seed 42 From b38c53ef0fdb63fe944e3de01ac1c8d871075ab1 Mon Sep 17 00:00:00 2001 From: zowiezhang Date: Thu, 9 Apr 2026 13:27:04 +0800 Subject: [PATCH 2/4] fix: models/networks/motus/wan/model.py line 38: E402 Module level import not at top of file --- lightx2v/models/networks/motus/wan/model.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/lightx2v/models/networks/motus/wan/model.py b/lightx2v/models/networks/motus/wan/model.py index f6c92e42c..659cef713 100644 --- a/lightx2v/models/networks/motus/wan/model.py +++ b/lightx2v/models/networks/motus/wan/model.py @@ -1,5 +1,6 @@ # Copyright 2024-2025 The Alibaba Wan Team Authors. All rights reserved. import math +from functools import lru_cache import torch import torch.nn as nn @@ -34,9 +35,6 @@ def rope_params(max_seq_len, dim, theta=10000): freqs = torch.polar(torch.ones_like(freqs), freqs) return freqs - -from functools import lru_cache - @torch.amp.autocast('cuda', enabled=False) def rope_apply(x: torch.Tensor, grid_sizes: torch.Tensor, freqs: torch.Tensor) -> torch.Tensor: B, T, N, CC = x.shape From bff8c153ad4355c8b706298f71b9ca7825a2ff59 Mon Sep 17 00:00:00 2001 From: zowiezhang Date: Thu, 9 Apr 2026 13:55:56 +0800 Subject: [PATCH 3/4] fix: improve code with the suggestion from gemini-bot --- lightx2v/models/networks/motus/core.py | 2 +- .../networks/motus/infer/transformer_infer.py | 9 +- lightx2v/models/networks/motus/model.py | 8 +- lightx2v/models/networks/motus/primitives.py | 20 +- lightx2v/models/networks/motus/wan/model.py | 263 +++++++----------- lightx2v/models/networks/motus/wan/t5.py | 218 ++++----------- .../models/networks/motus/wan/tokenizers.py | 4 +- lightx2v/models/networks/motus/wan/vae2_2.py | 148 +++------- lightx2v/models/networks/motus/wan_model.py | 5 +- lightx2v/models/runners/motus/__init__.py | 1 - lightx2v/models/runners/motus/motus_runner.py | 5 +- 11 files changed, 229 insertions(+), 454 deletions(-) diff --git a/lightx2v/models/networks/motus/core.py b/lightx2v/models/networks/motus/core.py index cd93ee98d..04670f8dd 100644 --- a/lightx2v/models/networks/motus/core.py +++ b/lightx2v/models/networks/motus/core.py @@ -1,7 +1,7 @@ import logging from dataclasses import dataclass from pathlib import Path -from typing import Dict, List, Optional, Tuple +from typing import Dict, List, Optional import torch import torch.nn as nn diff --git a/lightx2v/models/networks/motus/infer/transformer_infer.py b/lightx2v/models/networks/motus/infer/transformer_infer.py index 2213d4fa7..ada74ba79 100644 --- a/lightx2v/models/networks/motus/infer/transformer_infer.py +++ b/lightx2v/models/networks/motus/infer/transformer_infer.py @@ -88,7 +88,10 @@ def infer(self, weights, pre_infer_out): scheduler.step_pre(step_index) video_tokens = model.video_module.prepare_input(scheduler.video_latents.to(model.dtype)) state_tokens = pre_infer_out.state.unsqueeze(1).to(model.dtype) - registers = model.action_expert.registers.expand(state_tokens.shape[0], -1, -1) + # in case for the registers is set to 0 + registers = model.action_expert.registers + if registers is not None: + registers = registers.expand(state_tokens.shape[0], -1, -1) action_tokens = model.action_expert.input_encoder(state_tokens, scheduler.action_latents, registers) und_tokens = und_tokens_base.clone() @@ -118,7 +121,9 @@ def infer(self, weights, pre_infer_out): video_velocity = model.video_module.apply_output_head(video_tokens, video_head_time_emb) action_pred_full = model.action_expert.decoder(action_tokens, action_head_time_emb) - action_velocity = action_pred_full[:, 1 : -model.action_expert.config.num_registers, :] + # in case for the registers is set to 0 + num_regs = model.action_expert.config.num_registers + action_velocity = action_pred_full[:, 1:-num_regs, :] if num_regs > 0 else action_pred_full[:, 1:, :] scheduler.step(video_velocity=video_velocity, action_velocity=action_velocity, dt=dt, condition_frame_latent=pre_infer_out.condition_frame_latent) diff --git a/lightx2v/models/networks/motus/model.py b/lightx2v/models/networks/motus/model.py index 9a23b44d3..c39098e9f 100644 --- a/lightx2v/models/networks/motus/model.py +++ b/lightx2v/models/networks/motus/model.py @@ -10,16 +10,15 @@ from PIL import Image from loguru import logger from transformers import AutoProcessor -from tqdm import tqdm -from lightx2v.models.networks.motus.ops import LinearWithMM, TripleQKVProjector from lightx2v.models.networks.motus.core import Motus, MotusConfig from lightx2v.models.networks.motus.image_utils import resize_with_padding -from lightx2v.models.networks.motus.primitives import rope_apply -from lightx2v.models.networks.motus.t5 import T5EncoderModel from lightx2v.models.networks.motus.infer.post_infer import MotusPostInfer from lightx2v.models.networks.motus.infer.pre_infer import MotusPreInfer from lightx2v.models.networks.motus.infer.transformer_infer import MotusTransformerInfer +from lightx2v.models.networks.motus.ops import LinearWithMM, TripleQKVProjector +from lightx2v.models.networks.motus.primitives import rope_apply +from lightx2v.models.networks.motus.t5 import T5EncoderModel from lightx2v.models.schedulers.motus.scheduler import MotusScheduler @@ -122,6 +121,7 @@ def walk(obj: Any): signature = None if signature and "mm_token_type_ids" in signature.parameters: + def wrapped_get_rope_index(*args, __orig=method, **kwargs): if "mm_token_type_ids" not in kwargs: input_ids = kwargs.get("input_ids") diff --git a/lightx2v/models/networks/motus/primitives.py b/lightx2v/models/networks/motus/primitives.py index 7e5d642bb..5d30a1b26 100644 --- a/lightx2v/models/networks/motus/primitives.py +++ b/lightx2v/models/networks/motus/primitives.py @@ -30,14 +30,18 @@ def rope_apply(x: torch.Tensor, grid_sizes: torch.Tensor, freqs: torch.Tensor) - @lru_cache(maxsize=256) def _make_freq_grid(f: int, h: int, w: int): - return torch.cat( - [ - fpart[:f].view(f, 1, 1, -1).expand(f, h, w, -1), - hpart[:h].view(1, h, 1, -1).expand(f, h, w, -1), - wpart[:w].view(1, 1, w, -1).expand(f, h, w, -1), - ], - dim=-1, - ).reshape(f * h * w, 1, -1).contiguous() + return ( + torch.cat( + [ + fpart[:f].view(f, 1, 1, -1).expand(f, h, w, -1), + hpart[:h].view(1, h, 1, -1).expand(f, h, w, -1), + wpart[:w].view(1, 1, w, -1).expand(f, h, w, -1), + ], + dim=-1, + ) + .reshape(f * h * w, 1, -1) + .contiguous() + ) for g_idx, (f, h, w) in enumerate(uniq.tolist()): idx = (inv == g_idx).nonzero(as_tuple=False).squeeze(-1) diff --git a/lightx2v/models/networks/motus/wan/model.py b/lightx2v/models/networks/motus/wan/model.py index 659cef713..122b46613 100644 --- a/lightx2v/models/networks/motus/wan/model.py +++ b/lightx2v/models/networks/motus/wan/model.py @@ -9,7 +9,7 @@ from .attention import flash_attention -__all__ = ['WanModel'] +__all__ = ["WanModel"] def sinusoidal_embedding_1d(dim, position): @@ -19,23 +19,20 @@ def sinusoidal_embedding_1d(dim, position): position = position.type(torch.float64) # calculation - sinusoid = torch.outer( - position, torch.pow(10000, -torch.arange(half).to(position).div(half))) + sinusoid = torch.outer(position, torch.pow(10000, -torch.arange(half).to(position).div(half))) x = torch.cat([torch.cos(sinusoid), torch.sin(sinusoid)], dim=1) return x -@torch.amp.autocast('cuda', enabled=False) +@torch.amp.autocast("cuda", enabled=False) def rope_params(max_seq_len, dim, theta=10000): assert dim % 2 == 0 - freqs = torch.outer( - torch.arange(max_seq_len), - 1.0 / torch.pow(theta, - torch.arange(0, dim, 2).to(torch.float64).div(dim))) + freqs = torch.outer(torch.arange(max_seq_len), 1.0 / torch.pow(theta, torch.arange(0, dim, 2).to(torch.float64).div(dim))) freqs = torch.polar(torch.ones_like(freqs), freqs) return freqs -@torch.amp.autocast('cuda', enabled=False) + +@torch.amp.autocast("cuda", enabled=False) def rope_apply(x: torch.Tensor, grid_sizes: torch.Tensor, freqs: torch.Tensor) -> torch.Tensor: B, T, N, CC = x.shape assert CC % 2 == 0, "last dim must be 2C (real, imag)" @@ -55,16 +52,19 @@ def rope_apply(x: torch.Tensor, grid_sizes: torch.Tensor, freqs: torch.Tensor) - @lru_cache(maxsize=256) def _make_freq_grid(f: int, h: int, w: int): - fi = torch.cat([ - fpart[:f].view(f, 1, 1, -1).expand(f, h, w, -1), # [f,h,w,c_f] - hpart[:h].view(1, h, 1, -1).expand(f, h, w, -1), # [f,h,w,c_h] - wpart[:w].view(1, 1, w, -1).expand(f, h, w, -1), # [f,h,w,c_w] - ], dim=-1).reshape(f*h*w, 1, -1) # [seq_len,1,C] + fi = torch.cat( + [ + fpart[:f].view(f, 1, 1, -1).expand(f, h, w, -1), # [f,h,w,c_f] + hpart[:h].view(1, h, 1, -1).expand(f, h, w, -1), # [f,h,w,c_h] + wpart[:w].view(1, 1, w, -1).expand(f, h, w, -1), # [f,h,w,c_w] + ], + dim=-1, + ).reshape(f * h * w, 1, -1) # [seq_len,1,C] return fi.contiguous() for g_idx, (f, h, w) in enumerate(uniq.tolist()): idx = (inv == g_idx).nonzero(as_tuple=False).squeeze(-1) - if idx.numel() == 0: + if idx.numel() == 0: continue seq_len = f * h * w @@ -76,7 +76,8 @@ def _make_freq_grid(f: int, h: int, w: int): # assert rope_apply_original(x, grid_sizes, freqs).allclose(y, atol=1e-5) return y -@torch.amp.autocast('cuda', enabled=False) + +@torch.amp.autocast("cuda", enabled=False) def rope_apply_original(x, grid_sizes, freqs): n, c = x.size(2), x.size(3) // 2 @@ -89,14 +90,10 @@ def rope_apply_original(x, grid_sizes, freqs): seq_len = f * h * w # precompute multipliers - x_i = torch.view_as_complex(x[i, :seq_len].to(torch.float64).reshape( - seq_len, n, -1, 2)) - freqs_i = torch.cat([ - freqs[0][:f].view(f, 1, 1, -1).expand(f, h, w, -1), - freqs[1][:h].view(1, h, 1, -1).expand(f, h, w, -1), - freqs[2][:w].view(1, 1, w, -1).expand(f, h, w, -1) - ], - dim=-1).reshape(seq_len, 1, -1) + x_i = torch.view_as_complex(x[i, :seq_len].to(torch.float64).reshape(seq_len, n, -1, 2)) + freqs_i = torch.cat( + [freqs[0][:f].view(f, 1, 1, -1).expand(f, h, w, -1), freqs[1][:h].view(1, h, 1, -1).expand(f, h, w, -1), freqs[2][:w].view(1, 1, w, -1).expand(f, h, w, -1)], dim=-1 + ).reshape(seq_len, 1, -1) # apply rotary embedding x_i = torch.view_as_real(x_i * freqs_i).flatten(2) @@ -108,7 +105,6 @@ def rope_apply_original(x, grid_sizes, freqs): class WanRMSNorm(nn.Module): - def __init__(self, dim, eps=1e-5): super().__init__() self.dim = dim @@ -127,7 +123,6 @@ def _norm(self, x): class WanLayerNorm(nn.LayerNorm): - def __init__(self, dim, eps=1e-6, elementwise_affine=False): super().__init__(dim, elementwise_affine=elementwise_affine, eps=eps) @@ -140,13 +135,7 @@ def forward(self, x): class WanSelfAttention(nn.Module): - - def __init__(self, - dim, - num_heads, - window_size=(-1, -1), - qk_norm=True, - eps=1e-6): + def __init__(self, dim, num_heads, window_size=(-1, -1), qk_norm=True, eps=1e-6): assert dim % num_heads == 0 super().__init__() self.dim = dim @@ -164,13 +153,19 @@ def __init__(self, self.norm_q = WanRMSNorm(dim, eps=eps) if qk_norm else nn.Identity() self.norm_k = WanRMSNorm(dim, eps=eps) if qk_norm else nn.Identity() - def forward(self, x, seq_lens, grid_sizes, freqs, - action_q: torch.Tensor = None, - action_k: torch.Tensor = None, - action_v: torch.Tensor = None, - und_q: torch.Tensor = None, - und_k: torch.Tensor = None, - und_v: torch.Tensor = None): + def forward( + self, + x, + seq_lens, + grid_sizes, + freqs, + action_q: torch.Tensor = None, + action_k: torch.Tensor = None, + action_v: torch.Tensor = None, + und_q: torch.Tensor = None, + und_k: torch.Tensor = None, + und_v: torch.Tensor = None, + ): r""" Args: x(Tensor): Shape [B, L, num_heads, C / num_heads] @@ -194,16 +189,16 @@ def qkv_fn(x): # Trimodal MoT branch: WAN + Action + Understanding if action_q is not None or und_q is not None: L_x = q.size(1) - + # Apply RoPE only to video tokens (q, k) q_video_rope = rope_apply(q, grid_sizes, freqs) k_video_rope = rope_apply(k, grid_sizes, freqs) - + # Prepare parts for concatenation q_parts = [q_video_rope] - k_parts = [k_video_rope] + k_parts = [k_video_rope] v_parts = [v] - + # Add action tokens if provided if action_q is not None: q_parts.append(action_q) @@ -212,7 +207,7 @@ def qkv_fn(x): L_action = action_q.size(1) else: L_action = 0 - + # Add understanding tokens if provided if und_q is not None: q_parts.append(und_q) @@ -221,33 +216,28 @@ def qkv_fn(x): L_und = und_q.size(1) else: L_und = 0 - + # Concatenate all modalities q_cat = torch.cat(q_parts, dim=1) k_cat = torch.cat(k_parts, dim=1) v_cat = torch.cat(v_parts, dim=1) - attn_out = flash_attention( - q=q_cat, - k=k_cat, - v=v_cat, - k_lens=seq_lens, - window_size=self.window_size) + attn_out = flash_attention(q=q_cat, k=k_cat, v=v_cat, k_lens=seq_lens, window_size=self.window_size) # Split outputs back to respective modalities x_out = attn_out[:, :L_x, :, :] outputs = [x_out] - + start_idx = L_x if action_q is not None: - action_out = attn_out[:, start_idx:start_idx+L_action, :, :] + action_out = attn_out[:, start_idx : start_idx + L_action, :, :] outputs.append(action_out) start_idx += L_action else: outputs.append(None) - + if und_q is not None: - und_out = attn_out[:, start_idx:start_idx+L_und, :, :] + und_out = attn_out[:, start_idx : start_idx + L_und, :, :] outputs.append(und_out) else: outputs.append(None) @@ -256,16 +246,11 @@ def qkv_fn(x): x_out = x_out.flatten(2) x_out = self.o(x_out) outputs[0] = x_out - + return tuple(outputs) # Standard branch (no MoT) - x = flash_attention( - q=rope_apply(q, grid_sizes, freqs), - k=rope_apply(k, grid_sizes, freqs), - v=v, - k_lens=seq_lens, - window_size=self.window_size) + x = flash_attention(q=rope_apply(q, grid_sizes, freqs), k=rope_apply(k, grid_sizes, freqs), v=v, k_lens=seq_lens, window_size=self.window_size) # output x = x.flatten(2) @@ -274,7 +259,6 @@ def qkv_fn(x): class WanCrossAttention(WanSelfAttention): - def forward(self, x, context, context_lens): r""" Args: @@ -299,15 +283,7 @@ def forward(self, x, context, context_lens): class WanAttentionBlock(nn.Module): - - def __init__(self, - dim, - ffn_dim, - num_heads, - window_size=(-1, -1), - qk_norm=True, - cross_attn_norm=False, - eps=1e-6): + def __init__(self, dim, ffn_dim, num_heads, window_size=(-1, -1), qk_norm=True, cross_attn_norm=False, eps=1e-6): super().__init__() self.dim = dim self.ffn_dim = ffn_dim @@ -319,18 +295,12 @@ def __init__(self, # layers self.norm1 = WanLayerNorm(dim, eps) - self.self_attn = WanSelfAttention(dim, num_heads, window_size, qk_norm, - eps) - - self.norm3 = WanLayerNorm( - dim, eps, - elementwise_affine=True) if cross_attn_norm else nn.Identity() - self.cross_attn = WanCrossAttention(dim, num_heads, (-1, -1), qk_norm, - eps) + self.self_attn = WanSelfAttention(dim, num_heads, window_size, qk_norm, eps) + + self.norm3 = WanLayerNorm(dim, eps, elementwise_affine=True) if cross_attn_norm else nn.Identity() + self.cross_attn = WanCrossAttention(dim, num_heads, (-1, -1), qk_norm, eps) self.norm2 = WanLayerNorm(dim, eps) - self.ffn = nn.Sequential( - nn.Linear(dim, ffn_dim), nn.GELU(approximate='tanh'), - nn.Linear(ffn_dim, dim)) + self.ffn = nn.Sequential(nn.Linear(dim, ffn_dim), nn.GELU(approximate="tanh"), nn.Linear(ffn_dim, dim)) # modulation self.modulation = nn.Parameter(torch.randn(1, 6, dim) / dim**0.5) @@ -354,23 +324,20 @@ def forward( freqs(Tensor): Rope freqs, shape [1024, C / num_heads / 2] """ assert e.dtype == torch.float32 - with torch.amp.autocast('cuda', dtype=torch.float32): + with torch.amp.autocast("cuda", dtype=torch.float32): e = (self.modulation.unsqueeze(0) + e).chunk(6, dim=2) assert e[0].dtype == torch.float32 # self-attention - y = self.self_attn( - self.norm1(x).float() * (1 + e[1].squeeze(2)) + e[0].squeeze(2), - seq_lens, grid_sizes, freqs) - with torch.amp.autocast('cuda', dtype=torch.float32): + y = self.self_attn(self.norm1(x).float() * (1 + e[1].squeeze(2)) + e[0].squeeze(2), seq_lens, grid_sizes, freqs) + with torch.amp.autocast("cuda", dtype=torch.float32): x = x + y * e[2].squeeze(2) # cross-attention & ffn function def cross_attn_ffn(x, context, context_lens, e): x = x + self.cross_attn(self.norm3(x), context, context_lens) - y = self.ffn( - self.norm2(x).float() * (1 + e[4].squeeze(2)) + e[3].squeeze(2)) - with torch.amp.autocast('cuda', dtype=torch.float32): + y = self.ffn(self.norm2(x).float() * (1 + e[4].squeeze(2)) + e[3].squeeze(2)) + with torch.amp.autocast("cuda", dtype=torch.float32): x = x + y * e[5].squeeze(2) return x @@ -379,7 +346,6 @@ def cross_attn_ffn(x, context, context_lens, e): class Head(nn.Module): - def __init__(self, dim, out_dim, patch_size, eps=1e-6): super().__init__() self.dim = dim @@ -402,11 +368,9 @@ def forward(self, x, e): e(Tensor): Shape [B, L1, C] """ assert e.dtype == torch.float32 - with torch.amp.autocast('cuda', dtype=torch.float32): + with torch.amp.autocast("cuda", dtype=torch.float32): e = (self.modulation.unsqueeze(0) + e.unsqueeze(2)).chunk(2, dim=2) - x = ( - self.head( - self.norm(x) * (1 + e[1].squeeze(2)) + e[0].squeeze(2))) + x = self.head(self.norm(x) * (1 + e[1].squeeze(2)) + e[0].squeeze(2)) return x @@ -415,28 +379,28 @@ class WanModel(ModelMixin, ConfigMixin): Wan diffusion backbone supporting both text-to-video and image-to-video. """ - ignore_for_config = [ - 'patch_size', 'cross_attn_norm', 'qk_norm', 'text_dim', 'window_size' - ] - _no_split_modules = ['WanAttentionBlock'] + ignore_for_config = ["patch_size", "cross_attn_norm", "qk_norm", "text_dim", "window_size"] + _no_split_modules = ["WanAttentionBlock"] @register_to_config - def __init__(self, - model_type='t2v', - patch_size=(1, 2, 2), - text_len=512, - in_dim=16, - dim=2048, - ffn_dim=8192, - freq_dim=256, - text_dim=4096, - out_dim=16, - num_heads=16, - num_layers=32, - window_size=(-1, -1), - qk_norm=True, - cross_attn_norm=True, - eps=1e-6): + def __init__( + self, + model_type="t2v", + patch_size=(1, 2, 2), + text_len=512, + in_dim=16, + dim=2048, + ffn_dim=8192, + freq_dim=256, + text_dim=4096, + out_dim=16, + num_heads=16, + num_layers=32, + window_size=(-1, -1), + qk_norm=True, + cross_attn_norm=True, + eps=1e-6, + ): r""" Initialize the diffusion model backbone. @@ -475,7 +439,7 @@ def __init__(self, super().__init__() - assert model_type in ['t2v', 'i2v', 'ti2v'] + assert model_type in ["t2v", "i2v", "ti2v"] self.model_type = model_type self.patch_size = patch_size @@ -494,21 +458,14 @@ def __init__(self, self.eps = eps # embeddings - self.patch_embedding = nn.Conv3d( - in_dim, dim, kernel_size=patch_size, stride=patch_size) - self.text_embedding = nn.Sequential( - nn.Linear(text_dim, dim), nn.GELU(approximate='tanh'), - nn.Linear(dim, dim)) - - self.time_embedding = nn.Sequential( - nn.Linear(freq_dim, dim), nn.SiLU(), nn.Linear(dim, dim)) + self.patch_embedding = nn.Conv3d(in_dim, dim, kernel_size=patch_size, stride=patch_size) + self.text_embedding = nn.Sequential(nn.Linear(text_dim, dim), nn.GELU(approximate="tanh"), nn.Linear(dim, dim)) + + self.time_embedding = nn.Sequential(nn.Linear(freq_dim, dim), nn.SiLU(), nn.Linear(dim, dim)) self.time_projection = nn.Sequential(nn.SiLU(), nn.Linear(dim, dim * 6)) # blocks - self.blocks = nn.ModuleList([ - WanAttentionBlock(dim, ffn_dim, num_heads, window_size, qk_norm, - cross_attn_norm, eps) for _ in range(num_layers) - ]) + self.blocks = nn.ModuleList([WanAttentionBlock(dim, ffn_dim, num_heads, window_size, qk_norm, cross_attn_norm, eps) for _ in range(num_layers)]) # head self.head = Head(dim, out_dim, patch_size, eps) @@ -516,12 +473,7 @@ def __init__(self, # buffers (don't use register_buffer otherwise dtype will be changed in to()) assert (dim % num_heads) == 0 and (dim // num_heads) % 2 == 0 d = dim // num_heads - self.freqs = torch.cat([ - rope_params(1024, d - 4 * (d // 6)), - rope_params(1024, 2 * (d // 6)), - rope_params(1024, 2 * (d // 6)) - ], - dim=1) + self.freqs = torch.cat([rope_params(1024, d - 4 * (d // 6)), rope_params(1024, 2 * (d // 6)), rope_params(1024, 2 * (d // 6))], dim=1) # initialize weights self.init_weights() @@ -553,7 +505,7 @@ def forward( List[Tensor]: List of denoised video tensors with original input shapes [C_out, F, H / 8, W / 8] """ - if self.model_type == 'i2v': + if self.model_type == "i2v": assert y is not None # params device = self.patch_embedding.weight.device @@ -565,45 +517,28 @@ def forward( # embeddings x = [self.patch_embedding(u.unsqueeze(0)) for u in x] - grid_sizes = torch.stack( - [torch.tensor(u.shape[2:], dtype=torch.long) for u in x]) + grid_sizes = torch.stack([torch.tensor(u.shape[2:], dtype=torch.long) for u in x]) x = [u.flatten(2).transpose(1, 2) for u in x] seq_lens = torch.tensor([u.size(1) for u in x], dtype=torch.long) assert seq_lens.max() <= seq_len - x = torch.cat([ - torch.cat([u, u.new_zeros(1, seq_len - u.size(1), u.size(2))], - dim=1) for u in x - ]) + x = torch.cat([torch.cat([u, u.new_zeros(1, seq_len - u.size(1), u.size(2))], dim=1) for u in x]) # time embeddings if t.dim() == 1: t = t.expand(t.size(0), seq_len) - with torch.amp.autocast('cuda', dtype=torch.float32): + with torch.amp.autocast("cuda", dtype=torch.float32): bt = t.size(0) t = t.flatten() - e = self.time_embedding( - sinusoidal_embedding_1d(self.freq_dim, - t).unflatten(0, (bt, seq_len)).float()) + e = self.time_embedding(sinusoidal_embedding_1d(self.freq_dim, t).unflatten(0, (bt, seq_len)).float()) e0 = self.time_projection(e).unflatten(2, (6, self.dim)) assert e.dtype == torch.float32 and e0.dtype == torch.float32 # context context_lens = None - context = self.text_embedding( - torch.stack([ - torch.cat( - [u, u.new_zeros(self.text_len - u.size(0), u.size(1))]) - for u in context - ])) + context = self.text_embedding(torch.stack([torch.cat([u, u.new_zeros(self.text_len - u.size(0), u.size(1))]) for u in context])) # arguments - kwargs = dict( - e=e0, - seq_lens=seq_lens, - grid_sizes=grid_sizes, - freqs=self.freqs, - context=context, - context_lens=context_lens) + kwargs = dict(e=e0, seq_lens=seq_lens, grid_sizes=grid_sizes, freqs=self.freqs, context=context, context_lens=context_lens) for block in self.blocks: x = block(x, **kwargs) @@ -638,12 +573,12 @@ def unpatchify(self, x, grid_sizes): # 裁掉多余 token,并 reshape 成 patch 网格,排成原patch的形状 # 因为有些实现里,序列可能做过 padding 或对齐,所以这里只取前 F_patches * H_patches * W_patches 个 patch # [F_patches, H_patches, W_patches, pF, pH, pW, C_out], 我猜为:[3, 12, 10, 1, 2, 2, 48] - u = u[:math.prod(v)].view(*v, *self.patch_size, c) + u = u[: math.prod(v)].view(*v, *self.patch_size, c) # 交换维度,把 patch 网格和 patch 内部位置交错排列 # f h w:patch 网格坐标;p q r:patch 内部坐标;c:通道 # 交换后为:[C_out, F_patches, pF(一个 patch 覆盖多少帧), H_patches, pH(高度维 上,一个 patch 覆盖多少像素), W_patches, pW] # 下一步要直接 reshape 成完整视频,F_patches 和 pF 合并成完整帧数 F,H_patches 和 pH 合并成完整高度,W_patches 和 pW 合并成完整宽度 - u = torch.einsum('fhwpqrc->cfphqwr', u) + u = torch.einsum("fhwpqrc->cfphqwr", u) # 下行做了如下操作:[ # F_patches * pF, # H_patches * pH, @@ -669,10 +604,10 @@ def init_weights(self): nn.init.xavier_uniform_(self.patch_embedding.weight.flatten(1)) for m in self.text_embedding.modules(): if isinstance(m, nn.Linear): - nn.init.normal_(m.weight, std=.02) + nn.init.normal_(m.weight, std=0.02) for m in self.time_embedding.modules(): if isinstance(m, nn.Linear): - nn.init.normal_(m.weight, std=.02) + nn.init.normal_(m.weight, std=0.02) # init output layer nn.init.zeros_(self.head.head.weight) diff --git a/lightx2v/models/networks/motus/wan/t5.py b/lightx2v/models/networks/motus/wan/t5.py index c841b044a..e52c11fff 100644 --- a/lightx2v/models/networks/motus/wan/t5.py +++ b/lightx2v/models/networks/motus/wan/t5.py @@ -10,10 +10,10 @@ from .tokenizers import HuggingfaceTokenizer __all__ = [ - 'T5Model', - 'T5Encoder', - 'T5Decoder', - 'T5EncoderModel', + "T5Model", + "T5Encoder", + "T5Decoder", + "T5EncoderModel", ] @@ -34,24 +34,20 @@ def init_weights(m): nn.init.normal_(m.fc1.weight, std=m.dim**-0.5) nn.init.normal_(m.fc2.weight, std=m.dim_ffn**-0.5) elif isinstance(m, T5Attention): - nn.init.normal_(m.q.weight, std=(m.dim * m.dim_attn)**-0.5) + nn.init.normal_(m.q.weight, std=(m.dim * m.dim_attn) ** -0.5) nn.init.normal_(m.k.weight, std=m.dim**-0.5) nn.init.normal_(m.v.weight, std=m.dim**-0.5) - nn.init.normal_(m.o.weight, std=(m.num_heads * m.dim_attn)**-0.5) + nn.init.normal_(m.o.weight, std=(m.num_heads * m.dim_attn) ** -0.5) elif isinstance(m, T5RelativeEmbedding): - nn.init.normal_( - m.embedding.weight, std=(2 * m.num_buckets * m.num_heads)**-0.5) + nn.init.normal_(m.embedding.weight, std=(2 * m.num_buckets * m.num_heads) ** -0.5) class GELU(nn.Module): - def forward(self, x): - return 0.5 * x * (1.0 + torch.tanh( - math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))) + return 0.5 * x * (1.0 + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))) class T5LayerNorm(nn.Module): - def __init__(self, dim, eps=1e-6): super(T5LayerNorm, self).__init__() self.dim = dim @@ -59,15 +55,13 @@ def __init__(self, dim, eps=1e-6): self.weight = nn.Parameter(torch.ones(dim)) def forward(self, x): - x = x * torch.rsqrt(x.float().pow(2).mean(dim=-1, keepdim=True) + - self.eps) + x = x * torch.rsqrt(x.float().pow(2).mean(dim=-1, keepdim=True) + self.eps) if self.weight.dtype in [torch.float16, torch.bfloat16]: x = x.type_as(self.weight) return self.weight * x class T5Attention(nn.Module): - def __init__(self, dim, dim_attn, num_heads, dropout=0.1): assert dim_attn % num_heads == 0 super(T5Attention, self).__init__() @@ -104,14 +98,13 @@ def forward(self, x, context=None, mask=None, pos_bias=None): attn_bias += pos_bias if mask is not None: assert mask.ndim in [2, 3] - mask = mask.view(b, 1, 1, - -1) if mask.ndim == 2 else mask.unsqueeze(1) + mask = mask.view(b, 1, 1, -1) if mask.ndim == 2 else mask.unsqueeze(1) attn_bias.masked_fill_(mask == 0, torch.finfo(x.dtype).min) # compute attention (T5 does not use scaling) - attn = torch.einsum('binc,bjnc->bnij', q, k) + attn_bias + attn = torch.einsum("binc,bjnc->bnij", q, k) + attn_bias attn = F.softmax(attn.float(), dim=-1).type_as(attn) - x = torch.einsum('bnij,bjnc->binc', attn, v) + x = torch.einsum("bnij,bjnc->binc", attn, v) # output x = x.reshape(b, -1, n * c) @@ -121,7 +114,6 @@ def forward(self, x, context=None, mask=None, pos_bias=None): class T5FeedForward(nn.Module): - def __init__(self, dim, dim_ffn, dropout=0.1): super(T5FeedForward, self).__init__() self.dim = dim @@ -142,15 +134,7 @@ def forward(self, x): class T5SelfAttention(nn.Module): - - def __init__(self, - dim, - dim_attn, - dim_ffn, - num_heads, - num_buckets, - shared_pos=True, - dropout=0.1): + def __init__(self, dim, dim_attn, dim_ffn, num_heads, num_buckets, shared_pos=True, dropout=0.1): super(T5SelfAttention, self).__init__() self.dim = dim self.dim_attn = dim_attn @@ -164,27 +148,17 @@ def __init__(self, self.attn = T5Attention(dim, dim_attn, num_heads, dropout) self.norm2 = T5LayerNorm(dim) self.ffn = T5FeedForward(dim, dim_ffn, dropout) - self.pos_embedding = None if shared_pos else T5RelativeEmbedding( - num_buckets, num_heads, bidirectional=True) + self.pos_embedding = None if shared_pos else T5RelativeEmbedding(num_buckets, num_heads, bidirectional=True) def forward(self, x, mask=None, pos_bias=None): - e = pos_bias if self.shared_pos else self.pos_embedding( - x.size(1), x.size(1)) + e = pos_bias if self.shared_pos else self.pos_embedding(x.size(1), x.size(1)) x = fp16_clamp(x + self.attn(self.norm1(x), mask=mask, pos_bias=e)) x = fp16_clamp(x + self.ffn(self.norm2(x))) return x class T5CrossAttention(nn.Module): - - def __init__(self, - dim, - dim_attn, - dim_ffn, - num_heads, - num_buckets, - shared_pos=True, - dropout=0.1): + def __init__(self, dim, dim_attn, dim_ffn, num_heads, num_buckets, shared_pos=True, dropout=0.1): super(T5CrossAttention, self).__init__() self.dim = dim self.dim_attn = dim_attn @@ -200,26 +174,17 @@ def __init__(self, self.cross_attn = T5Attention(dim, dim_attn, num_heads, dropout) self.norm3 = T5LayerNorm(dim) self.ffn = T5FeedForward(dim, dim_ffn, dropout) - self.pos_embedding = None if shared_pos else T5RelativeEmbedding( - num_buckets, num_heads, bidirectional=False) - - def forward(self, - x, - mask=None, - encoder_states=None, - encoder_mask=None, - pos_bias=None): - e = pos_bias if self.shared_pos else self.pos_embedding( - x.size(1), x.size(1)) + self.pos_embedding = None if shared_pos else T5RelativeEmbedding(num_buckets, num_heads, bidirectional=False) + + def forward(self, x, mask=None, encoder_states=None, encoder_mask=None, pos_bias=None): + e = pos_bias if self.shared_pos else self.pos_embedding(x.size(1), x.size(1)) x = fp16_clamp(x + self.self_attn(self.norm1(x), mask=mask, pos_bias=e)) - x = fp16_clamp(x + self.cross_attn( - self.norm2(x), context=encoder_states, mask=encoder_mask)) + x = fp16_clamp(x + self.cross_attn(self.norm2(x), context=encoder_states, mask=encoder_mask)) x = fp16_clamp(x + self.ffn(self.norm3(x))) return x class T5RelativeEmbedding(nn.Module): - def __init__(self, num_buckets, num_heads, bidirectional, max_dist=128): super(T5RelativeEmbedding, self).__init__() self.num_buckets = num_buckets @@ -234,12 +199,10 @@ def forward(self, lq, lk): device = self.embedding.weight.device # rel_pos = torch.arange(lk).unsqueeze(0).to(device) - \ # torch.arange(lq).unsqueeze(1).to(device) - rel_pos = torch.arange(lk, device=device).unsqueeze(0) - \ - torch.arange(lq, device=device).unsqueeze(1) + rel_pos = torch.arange(lk, device=device).unsqueeze(0) - torch.arange(lq, device=device).unsqueeze(1) rel_pos = self._relative_position_bucket(rel_pos) rel_pos_embeds = self.embedding(rel_pos) - rel_pos_embeds = rel_pos_embeds.permute(2, 0, 1).unsqueeze( - 0) # [1, N, Lq, Lk] + rel_pos_embeds = rel_pos_embeds.permute(2, 0, 1).unsqueeze(0) # [1, N, Lq, Lk] return rel_pos_embeds.contiguous() def _relative_position_bucket(self, rel_pos): @@ -255,27 +218,14 @@ def _relative_position_bucket(self, rel_pos): # embeddings for small and large positions max_exact = num_buckets // 2 - rel_pos_large = max_exact + (torch.log(rel_pos.float() / max_exact) / - math.log(self.max_dist / max_exact) * - (num_buckets - max_exact)).long() - rel_pos_large = torch.min( - rel_pos_large, torch.full_like(rel_pos_large, num_buckets - 1)) + rel_pos_large = max_exact + (torch.log(rel_pos.float() / max_exact) / math.log(self.max_dist / max_exact) * (num_buckets - max_exact)).long() + rel_pos_large = torch.min(rel_pos_large, torch.full_like(rel_pos_large, num_buckets - 1)) rel_buckets += torch.where(rel_pos < max_exact, rel_pos, rel_pos_large) return rel_buckets class T5Encoder(nn.Module): - - def __init__(self, - vocab, - dim, - dim_attn, - dim_ffn, - num_heads, - num_layers, - num_buckets, - shared_pos=True, - dropout=0.1): + def __init__(self, vocab, dim, dim_attn, dim_ffn, num_heads, num_layers, num_buckets, shared_pos=True, dropout=0.1): super(T5Encoder, self).__init__() self.dim = dim self.dim_attn = dim_attn @@ -286,15 +236,10 @@ def __init__(self, self.shared_pos = shared_pos # layers - self.token_embedding = vocab if isinstance(vocab, nn.Embedding) \ - else nn.Embedding(vocab, dim) - self.pos_embedding = T5RelativeEmbedding( - num_buckets, num_heads, bidirectional=True) if shared_pos else None + self.token_embedding = vocab if isinstance(vocab, nn.Embedding) else nn.Embedding(vocab, dim) + self.pos_embedding = T5RelativeEmbedding(num_buckets, num_heads, bidirectional=True) if shared_pos else None self.dropout = nn.Dropout(dropout) - self.blocks = nn.ModuleList([ - T5SelfAttention(dim, dim_attn, dim_ffn, num_heads, num_buckets, - shared_pos, dropout) for _ in range(num_layers) - ]) + self.blocks = nn.ModuleList([T5SelfAttention(dim, dim_attn, dim_ffn, num_heads, num_buckets, shared_pos, dropout) for _ in range(num_layers)]) self.norm = T5LayerNorm(dim) # initialize weights @@ -303,8 +248,7 @@ def __init__(self, def forward(self, ids, mask=None): x = self.token_embedding(ids) x = self.dropout(x) - e = self.pos_embedding(x.size(1), - x.size(1)) if self.shared_pos else None + e = self.pos_embedding(x.size(1), x.size(1)) if self.shared_pos else None for block in self.blocks: x = block(x, mask, pos_bias=e) x = self.norm(x) @@ -313,17 +257,7 @@ def forward(self, ids, mask=None): class T5Decoder(nn.Module): - - def __init__(self, - vocab, - dim, - dim_attn, - dim_ffn, - num_heads, - num_layers, - num_buckets, - shared_pos=True, - dropout=0.1): + def __init__(self, vocab, dim, dim_attn, dim_ffn, num_heads, num_layers, num_buckets, shared_pos=True, dropout=0.1): super(T5Decoder, self).__init__() self.dim = dim self.dim_attn = dim_attn @@ -334,15 +268,10 @@ def __init__(self, self.shared_pos = shared_pos # layers - self.token_embedding = vocab if isinstance(vocab, nn.Embedding) \ - else nn.Embedding(vocab, dim) - self.pos_embedding = T5RelativeEmbedding( - num_buckets, num_heads, bidirectional=False) if shared_pos else None + self.token_embedding = vocab if isinstance(vocab, nn.Embedding) else nn.Embedding(vocab, dim) + self.pos_embedding = T5RelativeEmbedding(num_buckets, num_heads, bidirectional=False) if shared_pos else None self.dropout = nn.Dropout(dropout) - self.blocks = nn.ModuleList([ - T5CrossAttention(dim, dim_attn, dim_ffn, num_heads, num_buckets, - shared_pos, dropout) for _ in range(num_layers) - ]) + self.blocks = nn.ModuleList([T5CrossAttention(dim, dim_attn, dim_ffn, num_heads, num_buckets, shared_pos, dropout) for _ in range(num_layers)]) self.norm = T5LayerNorm(dim) # initialize weights @@ -360,8 +289,7 @@ def forward(self, ids, mask=None, encoder_states=None, encoder_mask=None): # layers x = self.token_embedding(ids) x = self.dropout(x) - e = self.pos_embedding(x.size(1), - x.size(1)) if self.shared_pos else None + e = self.pos_embedding(x.size(1), x.size(1)) if self.shared_pos else None for block in self.blocks: x = block(x, mask, encoder_states, encoder_mask, pos_bias=e) x = self.norm(x) @@ -370,18 +298,7 @@ def forward(self, ids, mask=None, encoder_states=None, encoder_mask=None): class T5Model(nn.Module): - - def __init__(self, - vocab_size, - dim, - dim_attn, - dim_ffn, - num_heads, - encoder_layers, - decoder_layers, - num_buckets, - shared_pos=True, - dropout=0.1): + def __init__(self, vocab_size, dim, dim_attn, dim_ffn, num_heads, encoder_layers, decoder_layers, num_buckets, shared_pos=True, dropout=0.1): super(T5Model, self).__init__() self.vocab_size = vocab_size self.dim = dim @@ -394,12 +311,8 @@ def __init__(self, # layers self.token_embedding = nn.Embedding(vocab_size, dim) - self.encoder = T5Encoder(self.token_embedding, dim, dim_attn, dim_ffn, - num_heads, encoder_layers, num_buckets, - shared_pos, dropout) - self.decoder = T5Decoder(self.token_embedding, dim, dim_attn, dim_ffn, - num_heads, decoder_layers, num_buckets, - shared_pos, dropout) + self.encoder = T5Encoder(self.token_embedding, dim, dim_attn, dim_ffn, num_heads, encoder_layers, num_buckets, shared_pos, dropout) + self.decoder = T5Decoder(self.token_embedding, dim, dim_attn, dim_ffn, num_heads, decoder_layers, num_buckets, shared_pos, dropout) self.head = nn.Linear(dim, vocab_size, bias=False) # initialize weights @@ -412,28 +325,21 @@ def forward(self, encoder_ids, encoder_mask, decoder_ids, decoder_mask): return x -def _t5(name, - encoder_only=False, - decoder_only=False, - return_tokenizer=False, - tokenizer_kwargs={}, - dtype=torch.float32, - device='cpu', - **kwargs): +def _t5(name, encoder_only=False, decoder_only=False, return_tokenizer=False, tokenizer_kwargs={}, dtype=torch.float32, device="cpu", **kwargs): # sanity check assert not (encoder_only and decoder_only) # params if encoder_only: model_cls = T5Encoder - kwargs['vocab'] = kwargs.pop('vocab_size') - kwargs['num_layers'] = kwargs.pop('encoder_layers') - _ = kwargs.pop('decoder_layers') + kwargs["vocab"] = kwargs.pop("vocab_size") + kwargs["num_layers"] = kwargs.pop("encoder_layers") + _ = kwargs.pop("decoder_layers") elif decoder_only: model_cls = T5Decoder - kwargs['vocab'] = kwargs.pop('vocab_size') - kwargs['num_layers'] = kwargs.pop('decoder_layers') - _ = kwargs.pop('encoder_layers') + kwargs["vocab"] = kwargs.pop("vocab_size") + kwargs["num_layers"] = kwargs.pop("decoder_layers") + _ = kwargs.pop("encoder_layers") else: model_cls = T5Model @@ -447,30 +353,20 @@ def _t5(name, # init tokenizer if return_tokenizer: from .tokenizers import HuggingfaceTokenizer - tokenizer = HuggingfaceTokenizer(f'google/{name}', **tokenizer_kwargs) + + tokenizer = HuggingfaceTokenizer(f"google/{name}", **tokenizer_kwargs) return model, tokenizer else: return model def umt5_xxl(**kwargs): - cfg = dict( - vocab_size=256384, - dim=4096, - dim_attn=4096, - dim_ffn=10240, - num_heads=64, - encoder_layers=24, - decoder_layers=24, - num_buckets=32, - shared_pos=False, - dropout=0.1) + cfg = dict(vocab_size=256384, dim=4096, dim_attn=4096, dim_ffn=10240, num_heads=64, encoder_layers=24, decoder_layers=24, num_buckets=32, shared_pos=False, dropout=0.1) cfg.update(**kwargs) - return _t5('umt5-xxl', **cfg) + return _t5("umt5-xxl", **cfg) class T5EncoderModel: - def __init__( self, text_len, @@ -487,25 +383,19 @@ def __init__( self.tokenizer_path = tokenizer_path # init model - model = umt5_xxl( - encoder_only=True, - return_tokenizer=False, - dtype=dtype, - device=device).eval().requires_grad_(False) - logging.info(f'loading {checkpoint_path}') - model.load_state_dict(torch.load(checkpoint_path, map_location='cpu')) + model = umt5_xxl(encoder_only=True, return_tokenizer=False, dtype=dtype, device=device).eval().requires_grad_(False) + logging.info(f"loading {checkpoint_path}") + model.load_state_dict(torch.load(checkpoint_path, map_location="cpu")) self.model = model if shard_fn is not None: self.model = shard_fn(self.model, sync_module_states=False) else: self.model.to(self.device) # init tokenizer - self.tokenizer = HuggingfaceTokenizer( - name=tokenizer_path, seq_len=text_len, clean='whitespace') + self.tokenizer = HuggingfaceTokenizer(name=tokenizer_path, seq_len=text_len, clean="whitespace") def __call__(self, texts, device): - ids, mask = self.tokenizer( - texts, return_mask=True, add_special_tokens=True) + ids, mask = self.tokenizer(texts, return_mask=True, add_special_tokens=True) ids = ids.to(device) mask = mask.to(device) seq_lens = mask.gt(0).sum(dim=1).long() diff --git a/lightx2v/models/networks/motus/wan/tokenizers.py b/lightx2v/models/networks/motus/wan/tokenizers.py index e9e167b05..36f72caa7 100644 --- a/lightx2v/models/networks/motus/wan/tokenizers.py +++ b/lightx2v/models/networks/motus/wan/tokenizers.py @@ -20,9 +20,7 @@ def whitespace_clean(text): def canonicalize(text, keep_punctuation_exact_string=None): text = text.replace("_", " ") if keep_punctuation_exact_string: - text = keep_punctuation_exact_string.join( - part.translate(str.maketrans("", "", string.punctuation)) for part in text.split(keep_punctuation_exact_string) - ) + text = keep_punctuation_exact_string.join(part.translate(str.maketrans("", "", string.punctuation)) for part in text.split(keep_punctuation_exact_string)) else: text = text.translate(str.maketrans("", "", string.punctuation)) text = text.lower() diff --git a/lightx2v/models/networks/motus/wan/vae2_2.py b/lightx2v/models/networks/motus/wan/vae2_2.py index b705ef66f..9bfaf32a4 100644 --- a/lightx2v/models/networks/motus/wan/vae2_2.py +++ b/lightx2v/models/networks/motus/wan/vae2_2.py @@ -43,7 +43,6 @@ def forward(self, x, cache_x=None): class RMS_norm(nn.Module): - def __init__(self, dim, channel_first=True, images=True, bias=False): super().__init__() broadcastable_dims = (1, 1, 1) if not images else (1, 1) @@ -55,12 +54,10 @@ def __init__(self, dim, channel_first=True, images=True, bias=False): self.bias = nn.Parameter(torch.zeros(shape)) if bias else 0.0 def forward(self, x): - return (F.normalize(x, dim=(1 if self.channel_first else -1)) * - self.scale * self.gamma + self.bias) + return F.normalize(x, dim=(1 if self.channel_first else -1)) * self.scale * self.gamma + self.bias class Upsample(nn.Upsample): - def forward(self, x): """ Fix bfloat16 support for nearest neighbor interpolation. @@ -69,7 +66,6 @@ def forward(self, x): class Resample(nn.Module): - def __init__(self, dim, mode): assert mode in ( "none", @@ -94,18 +90,12 @@ def __init__(self, dim, mode): nn.Conv2d(dim, dim, 3, padding=1), # nn.Conv2d(dim, dim//2, 3, padding=1) ) - self.time_conv = CausalConv3d( - dim, dim * 2, (3, 1, 1), padding=(1, 0, 0)) + self.time_conv = CausalConv3d(dim, dim * 2, (3, 1, 1), padding=(1, 0, 0)) elif mode == "downsample2d": - self.resample = nn.Sequential( - nn.ZeroPad2d((0, 1, 0, 1)), - nn.Conv2d(dim, dim, 3, stride=(2, 2))) + self.resample = nn.Sequential(nn.ZeroPad2d((0, 1, 0, 1)), nn.Conv2d(dim, dim, 3, stride=(2, 2))) elif mode == "downsample3d": - self.resample = nn.Sequential( - nn.ZeroPad2d((0, 1, 0, 1)), - nn.Conv2d(dim, dim, 3, stride=(2, 2))) - self.time_conv = CausalConv3d( - dim, dim, (3, 1, 1), stride=(2, 1, 1), padding=(0, 0, 0)) + self.resample = nn.Sequential(nn.ZeroPad2d((0, 1, 0, 1)), nn.Conv2d(dim, dim, 3, stride=(2, 2))) + self.time_conv = CausalConv3d(dim, dim, (3, 1, 1), stride=(2, 1, 1), padding=(0, 0, 0)) else: self.resample = nn.Identity() @@ -119,24 +109,18 @@ def forward(self, x, feat_cache=None, feat_idx=[0]): feat_idx[0] += 1 else: cache_x = x[:, :, -CACHE_T:, :, :].clone() - if (cache_x.shape[2] < 2 and feat_cache[idx] is not None and - feat_cache[idx] != "Rep"): + if cache_x.shape[2] < 2 and feat_cache[idx] is not None and feat_cache[idx] != "Rep": # cache last frame of last two chunk cache_x = torch.cat( [ - feat_cache[idx][:, :, -1, :, :].unsqueeze(2).to( - cache_x.device), + feat_cache[idx][:, :, -1, :, :].unsqueeze(2).to(cache_x.device), cache_x, ], dim=2, ) - if (cache_x.shape[2] < 2 and feat_cache[idx] is not None and - feat_cache[idx] == "Rep"): + if cache_x.shape[2] < 2 and feat_cache[idx] is not None and feat_cache[idx] == "Rep": cache_x = torch.cat( - [ - torch.zeros_like(cache_x).to(cache_x.device), - cache_x - ], + [torch.zeros_like(cache_x).to(cache_x.device), cache_x], dim=2, ) if feat_cache[idx] == "Rep": @@ -146,8 +130,7 @@ def forward(self, x, feat_cache=None, feat_idx=[0]): feat_cache[idx] = cache_x feat_idx[0] += 1 x = x.reshape(b, 2, c, t, h, w) - x = torch.stack((x[:, 0, :, :, :, :], x[:, 1, :, :, :, :]), - 3) + x = torch.stack((x[:, 0, :, :, :, :], x[:, 1, :, :, :, :]), 3) x = x.reshape(b, c, t * 2, h, w) t = x.shape[2] x = rearrange(x, "b c t h w -> (b t) c h w") @@ -162,8 +145,7 @@ def forward(self, x, feat_cache=None, feat_idx=[0]): feat_idx[0] += 1 else: cache_x = x[:, :, -1:, :, :].clone() - x = self.time_conv( - torch.cat([feat_cache[idx][:, :, -1:, :, :], x], 2)) + x = self.time_conv(torch.cat([feat_cache[idx][:, :, -1:, :, :], x], 2)) feat_cache[idx] = cache_x feat_idx[0] += 1 return x @@ -184,14 +166,13 @@ def init_weight2(self, conv): nn.init.zeros_(conv_weight) c1, c2, t, h, w = conv_weight.size() init_matrix = torch.eye(c1 // 2, c2) - conv_weight[:c1 // 2, :, -1, 0, 0] = init_matrix - conv_weight[c1 // 2:, :, -1, 0, 0] = init_matrix + conv_weight[: c1 // 2, :, -1, 0, 0] = init_matrix + conv_weight[c1 // 2 :, :, -1, 0, 0] = init_matrix conv.weight = nn.Parameter(conv_weight) nn.init.zeros_(conv.bias.data) class ResidualBlock(nn.Module): - def __init__(self, in_dim, out_dim, dropout=0.0): super().__init__() self.in_dim = in_dim @@ -207,9 +188,7 @@ def __init__(self, in_dim, out_dim, dropout=0.0): nn.Dropout(dropout), CausalConv3d(out_dim, out_dim, 3, padding=1), ) - self.shortcut = ( - CausalConv3d(in_dim, out_dim, 1) - if in_dim != out_dim else nn.Identity()) + self.shortcut = CausalConv3d(in_dim, out_dim, 1) if in_dim != out_dim else nn.Identity() def forward(self, x, feat_cache=None, feat_idx=[0]): h = self.shortcut(x) @@ -221,8 +200,7 @@ def forward(self, x, feat_cache=None, feat_idx=[0]): # cache last frame of last two chunk cache_x = torch.cat( [ - feat_cache[idx][:, :, -1, :, :].unsqueeze(2).to( - cache_x.device), + feat_cache[idx][:, :, -1, :, :].unsqueeze(2).to(cache_x.device), cache_x, ], dim=2, @@ -258,10 +236,7 @@ def forward(self, x): x = rearrange(x, "b c t h w -> (b t) c h w") x = self.norm(x) # compute query, key, value - q, k, v = ( - self.to_qkv(x).reshape(b * t, 1, c * 3, - -1).permute(0, 1, 3, - 2).contiguous().chunk(3, dim=-1)) + q, k, v = self.to_qkv(x).reshape(b * t, 1, c * 3, -1).permute(0, 1, 3, 2).contiguous().chunk(3, dim=-1) # apply attention x = F.scaled_dot_product_attention( @@ -281,8 +256,7 @@ def patchify(x, patch_size): if patch_size == 1: return x if x.dim() == 4: - x = rearrange( - x, "b c (h q) (w r) -> b (c r q) h w", q=patch_size, r=patch_size) + x = rearrange(x, "b c (h q) (w r) -> b (c r q) h w", q=patch_size, r=patch_size) elif x.dim() == 5: x = rearrange( x, @@ -301,8 +275,7 @@ def unpatchify(x, patch_size): return x if x.dim() == 4: - x = rearrange( - x, "b (c r q) h w -> b c (h q) (w r)", q=patch_size, r=patch_size) + x = rearrange(x, "b (c r q) h w -> b c (h q) (w r)", q=patch_size, r=patch_size) elif x.dim() == 5: x = rearrange( x, @@ -314,7 +287,6 @@ def unpatchify(x, patch_size): class AvgDown3D(nn.Module): - def __init__( self, in_channels, @@ -368,7 +340,6 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: class DupUp3D(nn.Module): - def __init__( self, in_channels: int, @@ -408,19 +379,12 @@ def forward(self, x: torch.Tensor, first_chunk=False) -> torch.Tensor: x.size(6) * self.factor_s, ) if first_chunk: - x = x[:, :, self.factor_t - 1:, :, :] + x = x[:, :, self.factor_t - 1 :, :, :] return x class Down_ResidualBlock(nn.Module): - - def __init__(self, - in_dim, - out_dim, - dropout, - mult, - temperal_downsample=False, - down_flag=False): + def __init__(self, in_dim, out_dim, dropout, mult, temperal_downsample=False, down_flag=False): super().__init__() # Shortcut path with downsample @@ -453,14 +417,7 @@ def forward(self, x, feat_cache=None, feat_idx=[0]): class Up_ResidualBlock(nn.Module): - - def __init__(self, - in_dim, - out_dim, - dropout, - mult, - temperal_upsample=False, - up_flag=False): + def __init__(self, in_dim, out_dim, dropout, mult, temperal_upsample=False, up_flag=False): super().__init__() # Shortcut path with upsample if up_flag: @@ -498,7 +455,6 @@ def forward(self, x, feat_cache=None, feat_idx=[0], first_chunk=False): class Encoder3d(nn.Module): - def __init__( self, dim=128, @@ -527,9 +483,7 @@ def __init__( # downsample blocks downsamples = [] for i, (in_dim, out_dim) in enumerate(zip(dims[:-1], dims[1:])): - t_down_flag = ( - temperal_downsample[i] - if i < len(temperal_downsample) else False) + t_down_flag = temperal_downsample[i] if i < len(temperal_downsample) else False downsamples.append( Down_ResidualBlock( in_dim=in_dim, @@ -538,7 +492,8 @@ def __init__( mult=num_res_blocks, temperal_downsample=t_down_flag, down_flag=i != len(dim_mult) - 1, - )) + ) + ) scale /= 2.0 self.downsamples = nn.Sequential(*downsamples) @@ -557,15 +512,13 @@ def __init__( ) def forward(self, x, feat_cache=None, feat_idx=[0]): - if feat_cache is not None: idx = feat_idx[0] cache_x = x[:, :, -CACHE_T:, :, :].clone() if cache_x.shape[2] < 2 and feat_cache[idx] is not None: cache_x = torch.cat( [ - feat_cache[idx][:, :, -1, :, :].unsqueeze(2).to( - cache_x.device), + feat_cache[idx][:, :, -1, :, :].unsqueeze(2).to(cache_x.device), cache_x, ], dim=2, @@ -598,8 +551,7 @@ def forward(self, x, feat_cache=None, feat_idx=[0]): if cache_x.shape[2] < 2 and feat_cache[idx] is not None: cache_x = torch.cat( [ - feat_cache[idx][:, :, -1, :, :].unsqueeze(2).to( - cache_x.device), + feat_cache[idx][:, :, -1, :, :].unsqueeze(2).to(cache_x.device), cache_x, ], dim=2, @@ -614,7 +566,6 @@ def forward(self, x, feat_cache=None, feat_idx=[0]): class Decoder3d(nn.Module): - def __init__( self, dim=128, @@ -635,7 +586,7 @@ def __init__( # dimensions dims = [dim * u for u in [dim_mult[-1]] + dim_mult[::-1]] - scale = 1.0 / 2**(len(dim_mult) - 2) + scale = 1.0 / 2 ** (len(dim_mult) - 2) # init block self.conv1 = CausalConv3d(z_dim, dims[0], 3, padding=1) @@ -649,8 +600,7 @@ def __init__( # upsample blocks upsamples = [] for i, (in_dim, out_dim) in enumerate(zip(dims[:-1], dims[1:])): - t_up_flag = temperal_upsample[i] if i < len( - temperal_upsample) else False + t_up_flag = temperal_upsample[i] if i < len(temperal_upsample) else False upsamples.append( Up_ResidualBlock( in_dim=in_dim, @@ -659,7 +609,8 @@ def __init__( mult=num_res_blocks + 1, temperal_upsample=t_up_flag, up_flag=i != len(dim_mult) - 1, - )) + ) + ) self.upsamples = nn.Sequential(*upsamples) # output blocks @@ -676,8 +627,7 @@ def forward(self, x, feat_cache=None, feat_idx=[0], first_chunk=False): if cache_x.shape[2] < 2 and feat_cache[idx] is not None: cache_x = torch.cat( [ - feat_cache[idx][:, :, -1, :, :].unsqueeze(2).to( - cache_x.device), + feat_cache[idx][:, :, -1, :, :].unsqueeze(2).to(cache_x.device), cache_x, ], dim=2, @@ -709,8 +659,7 @@ def forward(self, x, feat_cache=None, feat_idx=[0], first_chunk=False): if cache_x.shape[2] < 2 and feat_cache[idx] is not None: cache_x = torch.cat( [ - feat_cache[idx][:, :, -1, :, :].unsqueeze(2).to( - cache_x.device), + feat_cache[idx][:, :, -1, :, :].unsqueeze(2).to(cache_x.device), cache_x, ], dim=2, @@ -732,7 +681,6 @@ def count_conv3d(model): class WanVAE_(nn.Module): - def __init__( self, dim=160, @@ -795,15 +743,14 @@ def encode(self, x, scale): ) else: out_ = self.encoder( - x[:, :, 1 + 4 * (i - 1):1 + 4 * i, :, :], + x[:, :, 1 + 4 * (i - 1) : 1 + 4 * i, :, :], feat_cache=self._enc_feat_map, feat_idx=self._enc_conv_idx, ) out = torch.cat([out, out_], 2) mu, log_var = self.conv1(out).chunk(2, dim=1) if isinstance(scale[0], torch.Tensor): - mu = (mu - scale[0].view(1, self.z_dim, 1, 1, 1)) * scale[1].view( - 1, self.z_dim, 1, 1, 1) + mu = (mu - scale[0].view(1, self.z_dim, 1, 1, 1)) * scale[1].view(1, self.z_dim, 1, 1, 1) else: mu = (mu - scale[0]) * scale[1] self.clear_cache() @@ -812,8 +759,7 @@ def encode(self, x, scale): def decode(self, z, scale): self.clear_cache() if isinstance(scale[0], torch.Tensor): - z = z / scale[1].view(1, self.z_dim, 1, 1, 1) + scale[0].view( - 1, self.z_dim, 1, 1, 1) + z = z / scale[1].view(1, self.z_dim, 1, 1, 1) + scale[0].view(1, self.z_dim, 1, 1, 1) else: z = z / scale[1] + scale[0] iter_ = z.shape[2] @@ -822,14 +768,14 @@ def decode(self, z, scale): self._conv_idx = [0] if i == 0: out = self.decoder( - x[:, :, i:i + 1, :, :], + x[:, :, i : i + 1, :, :], feat_cache=self._feat_map, feat_idx=self._conv_idx, first_chunk=True, ) else: out_ = self.decoder( - x[:, :, i:i + 1, :, :], + x[:, :, i : i + 1, :, :], feat_cache=self._feat_map, feat_idx=self._conv_idx, ) @@ -879,14 +825,12 @@ def _video_vae(pretrained_path=None, z_dim=16, dim=160, device="cpu", **kwargs): # load checkpoint logging.info(f"loading {pretrained_path}") - model.load_state_dict( - torch.load(pretrained_path, map_location=device), assign=True) + model.load_state_dict(torch.load(pretrained_path, map_location=device), assign=True) return model class Wan2_2_VAE: - def __init__( self, z_dim=48, @@ -897,7 +841,6 @@ def __init__( dtype=torch.float, device="cuda", ): - self.dtype = dtype self.device = device @@ -1019,7 +962,11 @@ def __init__( dim=c_dim, dim_mult=dim_mult, temperal_downsample=temperal_downsample, - ).eval().requires_grad_(False).to(device)) + ) + .eval() + .requires_grad_(False) + .to(device) + ) def encode(self, videos): with torch.amp.autocast("cuda", dtype=self.dtype): @@ -1030,12 +977,9 @@ def decode(self, zs): if not isinstance(zs, list): raise TypeError("zs should be a list") with amp.autocast(dtype=self.dtype): - return [ - self.model.decode(u.unsqueeze(0), - self.scale).float().clamp_(-1, - 1).squeeze(0) - for u in zs - ] + return [self.model.decode(u.unsqueeze(0), self.scale).float().clamp_(-1, 1).squeeze(0) for u in zs] + # TODO: maybe can speed up with batch + # return self.model.decode(video_latents, self.scale).float().clamp(-1, 1) except TypeError as e: logging.info(e) return None diff --git a/lightx2v/models/networks/motus/wan_model.py b/lightx2v/models/networks/motus/wan_model.py index 2ea6d36b1..7a63ba5bb 100644 --- a/lightx2v/models/networks/motus/wan_model.py +++ b/lightx2v/models/networks/motus/wan_model.py @@ -1,12 +1,11 @@ import json import logging import os -from typing import Any, Dict, List, Optional +from typing import Any, Dict, Optional import torch import torch.nn as nn -from lightx2v.models.networks.motus.primitives import sinusoidal_embedding_1d from lightx2v.models.networks.motus.wan.model import WanModel from lightx2v.models.networks.motus.wan.vae2_2 import Wan2_2_VAE @@ -42,6 +41,8 @@ def encode_video(self, video_pixels: torch.Tensor) -> torch.Tensor: def decode_video(self, video_latents: torch.Tensor) -> torch.Tensor: with torch.no_grad(): return torch.stack([self.vae.decode([video_latents[i]])[0] for i in range(video_latents.shape[0])], dim=0) + # TODO: maybe can speed up with batch to tensor + # return self.vae.model.decode(video_latents, self.vae.scale).float().clamp(-1, 1) @classmethod def from_config(cls, config_path: str, vae_path: str, device: str = "cuda", precision: str = "bfloat16"): diff --git a/lightx2v/models/runners/motus/__init__.py b/lightx2v/models/runners/motus/__init__.py index 8b1378917..e69de29bb 100644 --- a/lightx2v/models/runners/motus/__init__.py +++ b/lightx2v/models/runners/motus/__init__.py @@ -1 +0,0 @@ - diff --git a/lightx2v/models/runners/motus/motus_runner.py b/lightx2v/models/runners/motus/motus_runner.py index 022e9d4bd..384623dc0 100644 --- a/lightx2v/models/runners/motus/motus_runner.py +++ b/lightx2v/models/runners/motus/motus_runner.py @@ -7,10 +7,10 @@ from lightx2v.models.networks.motus.model import MotusModel from lightx2v.models.runners.base_runner import BaseRunner -from lightx2v.utils.registry_factory import RUNNER_REGISTER -from lightx2v.utils.utils import save_to_video from lightx2v.server.metrics import monitor_cli from lightx2v.utils.profiler import * +from lightx2v.utils.registry_factory import RUNNER_REGISTER +from lightx2v.utils.utils import save_to_video from lightx2v_platform.base.global_var import AI_DEVICE @@ -92,4 +92,3 @@ def run_pipeline(self, input_info): num_inference_steps=int(self.config.get("num_inference_steps", 10)), ) self._save_outputs(pred_frames, pred_actions) - From e70e00bd4efa9de8d9466d2d5416dc89244aaf47 Mon Sep 17 00:00:00 2001 From: zowiezhang Date: Wed, 15 Apr 2026 12:38:05 +0800 Subject: [PATCH 4/4] Update motus trandformer_infer / ops to wan feature --- lightx2v/models/networks/motus/core.py | 16 ++- .../models/networks/motus/infer/module_io.py | 4 +- .../models/networks/motus/infer/pre_infer.py | 10 +- .../networks/motus/infer/transformer_infer.py | 135 +++++++++++++----- lightx2v/models/networks/motus/model.py | 35 ++++- lightx2v/models/networks/motus/t5.py | 2 +- .../models/networks/motus/wan/__init__.py | 6 +- lightx2v/models/networks/motus/wan_model.py | 16 ++- lightx2v/models/schedulers/motus/scheduler.py | 9 ++ 9 files changed, 183 insertions(+), 50 deletions(-) diff --git a/lightx2v/models/networks/motus/core.py b/lightx2v/models/networks/motus/core.py index 04670f8dd..477beb79e 100644 --- a/lightx2v/models/networks/motus/core.py +++ b/lightx2v/models/networks/motus/core.py @@ -7,14 +7,24 @@ import torch.nn as nn from transformers import AutoConfig, Qwen3VLForConditionalGeneration +from lightx2v.models.networks.wan.infer.triton_ops import fuse_scale_shift_kernel +from lightx2v.models.networks.wan.infer.utils import sinusoidal_embedding_1d + from .action_expert import ActionExpert, ActionExpertConfig -from .primitives import sinusoidal_embedding_1d from .und_expert import UndExpert, UndExpertConfig from .wan_model import WanVideoModel logger = logging.getLogger(__name__) +def _apply_modulation(x: torch.Tensor, scale: torch.Tensor, shift: torch.Tensor) -> torch.Tensor: + scale = scale.squeeze(2) + shift = shift.squeeze(2) + if x.is_cuda: + return fuse_scale_shift_kernel(x.float().contiguous(), scale.contiguous(), shift.contiguous()) + return x.float() * (1 + scale) + shift + + @dataclass class MotusConfig: wan_checkpoint_path: str @@ -89,7 +99,7 @@ def compute_adaln_modulation(self, video_adaln_params: torch.Tensor, layer_idx: def process_ffn(self, video_tokens: torch.Tensor, video_adaln_modulation: tuple, layer_idx: int) -> torch.Tensor: wan_layer = self.video_model.wan_model.blocks[layer_idx] v_mod = video_adaln_modulation - ffn_input = wan_layer.norm2(video_tokens).float() * (1 + v_mod[4].squeeze(2)) + v_mod[3].squeeze(2) + ffn_input = _apply_modulation(wan_layer.norm2(video_tokens), v_mod[4], v_mod[3]) ffn_out = wan_layer.ffn(ffn_input) with torch.amp.autocast("cuda", dtype=torch.float32): return video_tokens + ffn_out * v_mod[5].squeeze(2) @@ -223,7 +233,7 @@ def compute_adaln_modulation(self, action_adaln_params: torch.Tensor, layer_idx: def process_ffn(self, action_tokens: torch.Tensor, action_adaln_modulation: tuple, layer_idx: int) -> torch.Tensor: action_block = self.action_expert.blocks[layer_idx] a_mod = action_adaln_modulation - ffn_input = action_block.norm2(action_tokens).float() * (1 + a_mod[4].squeeze(2)) + a_mod[3].squeeze(2) + ffn_input = _apply_modulation(action_block.norm2(action_tokens), a_mod[4], a_mod[3]) ffn_out = action_block.ffn(ffn_input) with torch.amp.autocast("cuda", dtype=torch.float32): return action_tokens + ffn_out * a_mod[5].squeeze(2) diff --git a/lightx2v/models/networks/motus/infer/module_io.py b/lightx2v/models/networks/motus/infer/module_io.py index 6f2df77ac..6f3818718 100644 --- a/lightx2v/models/networks/motus/infer/module_io.py +++ b/lightx2v/models/networks/motus/infer/module_io.py @@ -3,6 +3,8 @@ import torch +from lightx2v.models.networks.wan.infer.module_io import GridOutput + @dataclass class MotusPreInferModuleOutput: @@ -15,7 +17,7 @@ class MotusPreInferModuleOutput: image_context: torch.Tensor | None und_tokens: torch.Tensor condition_frame_latent: torch.Tensor - grid_sizes: torch.Tensor + grid_sizes: GridOutput @dataclass diff --git a/lightx2v/models/networks/motus/infer/pre_infer.py b/lightx2v/models/networks/motus/infer/pre_infer.py index ab5cfd1c0..961f3ac9e 100644 --- a/lightx2v/models/networks/motus/infer/pre_infer.py +++ b/lightx2v/models/networks/motus/infer/pre_infer.py @@ -1,5 +1,7 @@ import torch +from lightx2v.models.networks.wan.infer.module_io import GridOutput + from .module_io import MotusPreInferModuleOutput @@ -27,6 +29,9 @@ def infer(self, image_path: str, prompt: str, state_value, seed: int | None = No und_tokens = self.adapter.model.und_module.extract_und_features(vlm_inputs) image_context = self.adapter.model.und_module.extract_image_context(vlm_inputs) + batch_size = state.shape[0] + grid_sizes = self.adapter.model.grid_sizes[:batch_size] + self.scheduler.prepare( seed=seed, condition_frame_latent=condition_frame_latent, @@ -45,5 +50,8 @@ def infer(self, image_path: str, prompt: str, state_value, seed: int | None = No image_context=image_context, und_tokens=und_tokens, condition_frame_latent=condition_frame_latent, - grid_sizes=self.adapter.model.grid_sizes[: state.shape[0]], + grid_sizes=GridOutput( + tensor=grid_sizes, + tuple=tuple(int(v) for v in grid_sizes[0].tolist()), + ), ) diff --git a/lightx2v/models/networks/motus/infer/transformer_infer.py b/lightx2v/models/networks/motus/infer/transformer_infer.py index ada74ba79..f7c6fb064 100644 --- a/lightx2v/models/networks/motus/infer/transformer_infer.py +++ b/lightx2v/models/networks/motus/infer/transformer_infer.py @@ -1,6 +1,8 @@ import torch from lightx2v.common.transformer_infer.transformer_infer import BaseTransformerInfer +from lightx2v.models.networks.wan.infer.triton_ops import fuse_scale_shift_kernel +from lightx2v.common.ops.norm.triton_ops import apply_rotary_embedding from ..ops import RegistryAttention @@ -12,20 +14,68 @@ def __init__(self, adapter, config): self.self_attn_1_type = config.get("self_attn_1_type", config.get("attention_type", "flash_attn2")) self.self_attn_2_type = config.get("self_attn_2_type", config.get("attention_type", "flash_attn2")) self.cross_attn_1_type = config.get("cross_attn_1_type", config.get("attention_type", "flash_attn2")) + self.modulate_type = config.get("modulate_type", "triton") + self.rope_type = config.get("rope_type", "triton") self.self_attn = RegistryAttention(self.self_attn_1_type) self.joint_self_attn = RegistryAttention(self.self_attn_2_type) self.cross_attn = RegistryAttention(self.cross_attn_1_type) + def _get_wan_layer(self, layer_idx): + return self.adapter.model.video_module.video_model.wan_model.blocks[layer_idx] + + def _get_action_block(self, layer_idx): + return self.adapter.model.action_expert.blocks[layer_idx] + + def _get_und_block(self, layer_idx): + return self.adapter.model.und_expert.blocks[layer_idx] + + def _modulate(self, x, modulation): + scale = modulation[1].squeeze(2) + shift = modulation[0].squeeze(2) + x = x.float().contiguous() + if self.modulate_type == "triton" and x.is_cuda: + return fuse_scale_shift_kernel(x, scale.contiguous(), shift.contiguous()) + return x * (1 + scale) + shift + + def _video_self_attention_qkv(self, video_tokens, pre_infer_out, layer_idx): + model = self.adapter.model + wan_layer = self._get_wan_layer(layer_idx) + batch, seq_len, dim = video_tokens.shape + num_heads = model.video_model.wan_model.num_heads + head_dim = dim // num_heads + + video_q = wan_layer.self_attn.norm_q(wan_layer.self_attn.q(video_tokens)).view(batch, seq_len, num_heads, head_dim) + video_k = wan_layer.self_attn.norm_k(wan_layer.self_attn.k(video_tokens)).view(batch, seq_len, num_heads, head_dim) + video_v = wan_layer.self_attn.v(video_tokens).view(batch, seq_len, num_heads, head_dim) + video_q, video_k = self._apply_video_rope(video_q, video_k, pre_infer_out) + return video_q, video_k, video_v + + def _apply_video_rope(self, video_q, video_k, pre_infer_out): + grid_sizes = pre_infer_out.grid_sizes.tensor if hasattr(pre_infer_out.grid_sizes, "tensor") else pre_infer_out.grid_sizes + if self.rope_type == "triton" and video_q.is_cuda and video_q.dtype == torch.bfloat16 and video_k.dtype == torch.bfloat16: + q_out = video_q.clone() + k_out = video_k.clone() + for batch_idx, grid_size in enumerate(grid_sizes.tolist()): + grid_size_tuple = tuple(int(v) for v in grid_size) + seq_len = grid_size_tuple[0] * grid_size_tuple[1] * grid_size_tuple[2] + cos, sin = self.adapter.get_wan_rotary_cos_sin(grid_size_tuple) + q_out[batch_idx, :seq_len] = apply_rotary_embedding(q_out[batch_idx, :seq_len].contiguous(), cos, sin) + k_out[batch_idx, :seq_len] = apply_rotary_embedding(k_out[batch_idx, :seq_len].contiguous(), cos, sin) + return q_out, k_out + + freqs = self.adapter.get_wan_freqs() + return self.adapter.rope_apply(video_q, grid_sizes, freqs), self.adapter.rope_apply(video_k, grid_sizes, freqs) + def _joint_attention(self, pre_infer_out, video_tokens, action_tokens, und_tokens, video_adaln_modulation, action_adaln_modulation, layer_idx): model = self.adapter.model - wan_layer = model.video_module.video_model.wan_model.blocks[layer_idx] - action_block = model.action_expert.blocks[layer_idx] - und_block = model.und_expert.blocks[layer_idx] + wan_layer = self._get_wan_layer(layer_idx) + action_block = self._get_action_block(layer_idx) + und_block = self._get_und_block(layer_idx) v_mod = video_adaln_modulation a_mod = action_adaln_modulation - norm_video = wan_layer.norm1(video_tokens).float() * (1 + v_mod[1].squeeze(2)) + v_mod[0].squeeze(2) - norm_action = action_block.norm1(action_tokens).float() * (1 + a_mod[1].squeeze(2)) + a_mod[0].squeeze(2) + norm_video = self._modulate(wan_layer.norm1(video_tokens), v_mod) + norm_action = self._modulate(action_block.norm1(action_tokens), a_mod) norm_und = und_block.norm1(und_tokens) batch, video_len, video_dim = norm_video.shape @@ -34,12 +84,7 @@ def _joint_attention(self, pre_infer_out, video_tokens, action_tokens, und_token num_heads = model.video_model.wan_model.num_heads head_dim = video_dim // num_heads - video_q = wan_layer.self_attn.norm_q(wan_layer.self_attn.q(norm_video)).view(batch, video_len, num_heads, head_dim) - video_k = wan_layer.self_attn.norm_k(wan_layer.self_attn.k(norm_video)).view(batch, video_len, num_heads, head_dim) - video_v = wan_layer.self_attn.v(norm_video).view(batch, video_len, num_heads, head_dim) - freqs = self.adapter.get_wan_freqs() - video_q = self.adapter.rope_apply(video_q, pre_infer_out.grid_sizes, freqs) - video_k = self.adapter.rope_apply(video_k, pre_infer_out.grid_sizes, freqs) + video_q, video_k, video_v = self._video_self_attention_qkv(norm_video, pre_infer_out, layer_idx) action_q, action_k, action_v = action_block.wan_action_qkv_mm(norm_action) action_q = action_block.wan_action_norm_q(action_q.flatten(-2)).view(batch, action_len, num_heads, head_dim) @@ -64,7 +109,7 @@ def _joint_attention(self, pre_infer_out, video_tokens, action_tokens, und_token return video_tokens, action_tokens, und_tokens def _cross_attention(self, video_tokens, processed_t5_context, layer_idx): - wan_layer = self.adapter.model.video_module.video_model.wan_model.blocks[layer_idx] + wan_layer = self._get_wan_layer(layer_idx) batch, q_len, dim = video_tokens.shape ctx_len = processed_t5_context.shape[1] num_heads = wan_layer.cross_attn.num_heads @@ -76,6 +121,41 @@ def _cross_attention(self, video_tokens, processed_t5_context, layer_idx): v = wan_layer.cross_attn.v(processed_t5_context).view(batch, ctx_len, num_heads, head_dim) return video_tokens + wan_layer.cross_attn.o(self.cross_attn(q, k, v)) + def _prepare_action_tokens(self, pre_infer_out, action_latents): + model = self.adapter.model + state_tokens = pre_infer_out.state.unsqueeze(1).to(model.dtype) + registers = model.action_expert.registers + if registers is not None: + registers = registers.expand(state_tokens.shape[0], -1, -1) + action_tokens = model.action_expert.input_encoder(state_tokens, action_latents, registers) + return state_tokens, action_tokens + + def _prepare_step_embeddings(self, batch_size, video_tokens, action_tokens, timestep): + model = self.adapter.model + timestep_scaled = (timestep * 1000).expand(batch_size).to(model.dtype) + video_head_time_emb, video_adaln_params = model.video_module.get_time_embedding(timestep_scaled, video_tokens.shape[1]) + action_head_time_emb, action_adaln_params = model.action_module.get_time_embedding(timestep_scaled, action_tokens.shape[1]) + return video_head_time_emb, video_adaln_params, action_head_time_emb, action_adaln_params + + def _run_layer(self, pre_infer_out, video_tokens, action_tokens, und_tokens, processed_t5_context, video_adaln_params, action_adaln_params, layer_idx): + model = self.adapter.model + video_adaln_modulation = model.video_module.compute_adaln_modulation(video_adaln_params, layer_idx) + action_adaln_modulation = model.action_module.compute_adaln_modulation(action_adaln_params, layer_idx) + video_tokens, action_tokens, und_tokens = self._joint_attention( + pre_infer_out, + video_tokens, + action_tokens, + und_tokens, + video_adaln_modulation, + action_adaln_modulation, + layer_idx, + ) + video_tokens = self._cross_attention(video_tokens, processed_t5_context, layer_idx) + video_tokens = model.video_module.process_ffn(video_tokens, video_adaln_modulation, layer_idx) + action_tokens = model.action_module.process_ffn(action_tokens, action_adaln_modulation, layer_idx) + und_tokens = model.und_module.process_ffn(und_tokens, layer_idx) + return video_tokens, action_tokens, und_tokens + @torch.no_grad() def infer(self, weights, pre_infer_out): model = self.adapter.model @@ -87,37 +167,28 @@ def infer(self, weights, pre_infer_out): for step_index, t, t_next, dt in scheduler.iter_steps(): scheduler.step_pre(step_index) video_tokens = model.video_module.prepare_input(scheduler.video_latents.to(model.dtype)) - state_tokens = pre_infer_out.state.unsqueeze(1).to(model.dtype) - # in case for the registers is set to 0 - registers = model.action_expert.registers - if registers is not None: - registers = registers.expand(state_tokens.shape[0], -1, -1) - action_tokens = model.action_expert.input_encoder(state_tokens, scheduler.action_latents, registers) + state_tokens, action_tokens = self._prepare_action_tokens(pre_infer_out, scheduler.action_latents) und_tokens = und_tokens_base.clone() - video_t_scaled = (t * 1000).expand(state_tokens.shape[0]).to(model.dtype) - action_t_scaled = (t * 1000).expand(state_tokens.shape[0]).to(model.dtype) - with torch.autocast(device_type="cuda", dtype=model.video_model.precision): - video_head_time_emb, video_adaln_params = model.video_module.get_time_embedding(video_t_scaled, video_tokens.shape[1]) - action_head_time_emb, action_adaln_params = model.action_module.get_time_embedding(action_t_scaled, action_tokens.shape[1]) + video_head_time_emb, video_adaln_params, action_head_time_emb, action_adaln_params = self._prepare_step_embeddings( + state_tokens.shape[0], + video_tokens, + action_tokens, + t, + ) for layer_idx in range(model.config.num_layers): - video_adaln_modulation = model.video_module.compute_adaln_modulation(video_adaln_params, layer_idx) - action_adaln_modulation = model.action_module.compute_adaln_modulation(action_adaln_params, layer_idx) - video_tokens, action_tokens, und_tokens = self._joint_attention( + video_tokens, action_tokens, und_tokens = self._run_layer( pre_infer_out, video_tokens, action_tokens, und_tokens, - video_adaln_modulation, - action_adaln_modulation, + processed_t5_context, + video_adaln_params, + action_adaln_params, layer_idx, ) - video_tokens = self._cross_attention(video_tokens, processed_t5_context, layer_idx) - video_tokens = model.video_module.process_ffn(video_tokens, video_adaln_modulation, layer_idx) - action_tokens = model.action_module.process_ffn(action_tokens, action_adaln_modulation, layer_idx) - und_tokens = model.und_module.process_ffn(und_tokens, layer_idx) video_velocity = model.video_module.apply_output_head(video_tokens, video_head_time_emb) action_pred_full = model.action_expert.decoder(action_tokens, action_head_time_emb) diff --git a/lightx2v/models/networks/motus/model.py b/lightx2v/models/networks/motus/model.py index c39098e9f..d36967182 100644 --- a/lightx2v/models/networks/motus/model.py +++ b/lightx2v/models/networks/motus/model.py @@ -17,8 +17,8 @@ from lightx2v.models.networks.motus.infer.pre_infer import MotusPreInfer from lightx2v.models.networks.motus.infer.transformer_infer import MotusTransformerInfer from lightx2v.models.networks.motus.ops import LinearWithMM, TripleQKVProjector +from lightx2v.models.input_encoders.hf.wan.t5.model import T5EncoderModel from lightx2v.models.networks.motus.primitives import rope_apply -from lightx2v.models.networks.motus.t5 import T5EncoderModel from lightx2v.models.schedulers.motus.scheduler import MotusScheduler @@ -42,6 +42,7 @@ def __init__(self, config, device): self.t5_encoder = self._load_t5_encoder() self.vlm_processor = self._load_vlm_processor() self._load_normalization_stats() + self._rope_cos_sin_cache = {} self._build_native_stack() def _build_native_stack(self): @@ -96,7 +97,7 @@ def _load_t5_encoder(self): return self._t5_encoder_cls( text_len=512, dtype=torch.bfloat16, - device=str(self.device), + device=self.device, checkpoint_path=os.path.join(self.config["wan_path"], "models_t5_umt5-xxl-enc-bf16.pth"), tokenizer_path=os.path.join(self.config["wan_path"], "google", "umt5-xxl"), ) @@ -233,6 +234,31 @@ def get_wan_freqs(self) -> torch.Tensor: freqs = freqs.to(self.device) return freqs + def get_wan_rotary_cos_sin(self, grid_size: tuple[int, int, int]): + if grid_size in self._rope_cos_sin_cache: + return self._rope_cos_sin_cache[grid_size] + + freqs = self.get_wan_freqs() + head_dim_half = freqs.shape[1] + c_f = head_dim_half - 2 * (head_dim_half // 3) + c_h = head_dim_half // 3 + c_w = head_dim_half // 3 + fpart, hpart, wpart = freqs.split([c_f, c_h, c_w], dim=1) + f, h, w = grid_size + + freq_grid = torch.cat( + [ + fpart[:f].view(f, 1, 1, -1).expand(f, h, w, -1), + hpart[:h].view(1, h, 1, -1).expand(f, h, w, -1), + wpart[:w].view(1, 1, w, -1).expand(f, h, w, -1), + ], + dim=-1, + ).reshape(f * h * w, -1) + + cos_sin = (freq_grid.real.contiguous(), freq_grid.imag.contiguous()) + self._rope_cos_sin_cache[grid_size] = cos_sin + return cos_sin + def prepare_frame(self, image_path: str) -> torch.Tensor: image = Image.open(image_path).convert("RGB") image_np = np.asarray(image).astype(np.float32) / 255.0 @@ -263,7 +289,10 @@ def build_instruction(self, prompt: str) -> str: return f"{prefix}{prompt}" def build_t5_embeddings(self, instruction: str): - t5_out = self.t5_encoder([instruction], str(self.device)) + if hasattr(self.t5_encoder, "infer"): + t5_out = self.t5_encoder.infer([instruction]) + else: + t5_out = self.t5_encoder([instruction], self.device) if isinstance(t5_out, torch.Tensor): return [t5_out.squeeze(0)] if t5_out.dim() == 3 else [t5_out] return t5_out diff --git a/lightx2v/models/networks/motus/t5.py b/lightx2v/models/networks/motus/t5.py index 31c4e77ef..e899c20d9 100644 --- a/lightx2v/models/networks/motus/t5.py +++ b/lightx2v/models/networks/motus/t5.py @@ -1,3 +1,3 @@ -from .wan.t5 import T5EncoderModel +from lightx2v.models.input_encoders.hf.wan.t5.model import T5EncoderModel __all__ = ["T5EncoderModel"] diff --git a/lightx2v/models/networks/motus/wan/__init__.py b/lightx2v/models/networks/motus/wan/__init__.py index 9ce1f8e18..564929b04 100644 --- a/lightx2v/models/networks/motus/wan/__init__.py +++ b/lightx2v/models/networks/motus/wan/__init__.py @@ -1,8 +1,8 @@ from .attention import flash_attention from .model import WanModel -from .t5 import T5EncoderModel -from .tokenizers import HuggingfaceTokenizer -from .vae2_2 import Wan2_2_VAE +from lightx2v.models.input_encoders.hf.wan.t5.model import T5EncoderModel +from lightx2v.models.input_encoders.hf.wan.t5.tokenizer import HuggingfaceTokenizer +from lightx2v.models.video_encoders.hf.wan.vae_2_2 import Wan2_2_VAE __all__ = [ "WanModel", diff --git a/lightx2v/models/networks/motus/wan_model.py b/lightx2v/models/networks/motus/wan_model.py index 7a63ba5bb..0eafd17b1 100644 --- a/lightx2v/models/networks/motus/wan_model.py +++ b/lightx2v/models/networks/motus/wan_model.py @@ -7,7 +7,7 @@ import torch.nn as nn from lightx2v.models.networks.motus.wan.model import WanModel -from lightx2v.models.networks.motus.wan.vae2_2 import Wan2_2_VAE +from lightx2v.models.video_encoders.hf.wan.vae_2_2 import Wan2_2_VAE try: from safetensors.torch import load_file as safe_load_file @@ -32,17 +32,21 @@ def __init__(self, model_config: Dict[str, Any], vae_path: str, device: str = "c self.precision = {"float32": torch.float32, "float16": torch.float16, "bfloat16": torch.bfloat16}[precision] self.wan_model = WanModel(**model_config) self.wan_model.to(device=self.device, dtype=self.precision) - self.vae = Wan2_2_VAE(vae_pth=vae_path, device=self.device) + self.vae = Wan2_2_VAE(vae_path=vae_path, dtype=self.precision, device=self.device) def encode_video(self, video_pixels: torch.Tensor) -> torch.Tensor: with torch.no_grad(): - return self.vae.encode(video_pixels) + return self.vae.encode(video_pixels.to(device=self.device, dtype=self.precision)) def decode_video(self, video_latents: torch.Tensor) -> torch.Tensor: with torch.no_grad(): - return torch.stack([self.vae.decode([video_latents[i]])[0] for i in range(video_latents.shape[0])], dim=0) - # TODO: maybe can speed up with batch to tensor - # return self.vae.model.decode(video_latents, self.vae.scale).float().clamp(-1, 1) + decoded = [] + for i in range(video_latents.shape[0]): + sample = self.vae.decode(video_latents[i].to(device=self.device, dtype=self.precision)) + if sample.dim() > 4 and sample.shape[0] == 1: + sample = sample.squeeze(0) + decoded.append(sample) + return torch.stack(decoded, dim=0) @classmethod def from_config(cls, config_path: str, vae_path: str, device: str = "cuda", precision: str = "bfloat16"): diff --git a/lightx2v/models/schedulers/motus/scheduler.py b/lightx2v/models/schedulers/motus/scheduler.py index faf198ada..ffcad2011 100644 --- a/lightx2v/models/schedulers/motus/scheduler.py +++ b/lightx2v/models/schedulers/motus/scheduler.py @@ -10,7 +10,15 @@ def __init__(self, config): self.action_latents = None self.timesteps = None + def _ensure_video_latent_5d(self, latent: torch.Tensor) -> torch.Tensor: + if latent.dim() == 5: + return latent + if latent.dim() == 4: + return latent.unsqueeze(0) + raise ValueError(f"Expected condition_frame_latent to be 4D or 5D, got shape {tuple(latent.shape)}") + def prepare(self, seed, condition_frame_latent, action_shape, dtype, device): + condition_frame_latent = self._ensure_video_latent_5d(condition_frame_latent) batch, channels, _, latent_h, latent_w = condition_frame_latent.shape total_latent_frames = 1 + self.config["num_video_frames"] // 4 generator = None if seed is None else torch.Generator(device=device).manual_seed(seed) @@ -33,6 +41,7 @@ def iter_steps(self): yield step_index, t, t_next, t_next - t def step(self, video_velocity, action_velocity, dt, condition_frame_latent): + condition_frame_latent = self._ensure_video_latent_5d(condition_frame_latent) self.video_latents = self.video_latents + video_velocity * dt self.action_latents = self.action_latents + action_velocity * dt self.video_latents[:, :, 0:1] = condition_frame_latent