ModelTC
diff --git a/‎configs/cosmos3/cosmos3_super_omni_action_fd_agibotworld.json‎
Lines changed: 28 additions & 0 deletions b/‎configs/cosmos3/cosmos3_super_omni_action_fd_agibotworld.json‎
Lines changed: 28 additions & 0 deletions
diff --git a/‎configs/cosmos3/cosmos3_super_omni_action_fd_agibotworld_multichunk.json‎
Lines changed: 30 additions & 0 deletions b/‎configs/cosmos3/cosmos3_super_omni_action_fd_agibotworld_multichunk.json‎
Lines changed: 30 additions & 0 deletions
diff --git a/‎configs/cosmos3/cosmos3_super_omni_action_id_av.json‎
Lines changed: 27 additions & 0 deletions b/‎configs/cosmos3/cosmos3_super_omni_action_id_av.json‎
Lines changed: 27 additions & 0 deletions
diff --git a/‎configs/cosmos3/cosmos3_super_omni_i2av.json‎
Lines changed: 23 additions & 0 deletions b/‎configs/cosmos3/cosmos3_super_omni_i2av.json‎
Lines changed: 23 additions & 0 deletions
diff --git a/‎configs/cosmos3/cosmos3_super_omni_i2v.json‎
Lines changed: 22 additions & 0 deletions b/‎configs/cosmos3/cosmos3_super_omni_i2v.json‎
Lines changed: 22 additions & 0 deletions
diff --git a/‎configs/cosmos3/cosmos3_super_omni_t2av.json‎
Lines changed: 23 additions & 0 deletions b/‎configs/cosmos3/cosmos3_super_omni_t2av.json‎
Lines changed: 23 additions & 0 deletions
diff --git a/‎configs/cosmos3/cosmos3_super_omni_t2v.json‎
Lines changed: 22 additions & 0 deletions b/‎configs/cosmos3/cosmos3_super_omni_t2v.json‎
Lines changed: 22 additions & 0 deletions
diff --git a/‎configs/cosmos3/cosmos3_super_t2v.json‎
Lines changed: 22 additions & 0 deletions b/‎configs/cosmos3/cosmos3_super_t2v.json‎
Lines changed: 22 additions & 0 deletions
diff --git a/‎lightx2v/infer.py‎
Lines changed: 5 additions & 0 deletions b/‎lightx2v/infer.py‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎lightx2v/models/audio_encoders/__init__.py‎ b/‎lightx2v/models/audio_encoders/__init__.py‎
@@ -0,0 +1,28 @@
+{
+    "infer_steps": 30,
+    "sample_guide_scale": 1.0,
+    "sample_shift": 10.0,
+    "target_height": 720,
+    "target_width": 640,
+    "target_video_length": 17,
+    "target_fps": 10.0,
+    "enable_cfg": true,
+    "action_mode": "forward_dynamics",
+    "domain_name": "agibotworld",
+    "view_point": "concat_view",
+    "action_chunk_size": 16,
+    "action_chunk_index": 0,
+    "feature_caching": "NoCaching",
+    "rms_norm_type": "one-pass",
+    "attn_rms_norm_type": "one-pass",
+    "rope_type": "triton",
+    "self_attn_type": "flash_attn3",
+    "causal_self_attn_type": "flash_attn3",
+    "add_resolution_template": false,
+    "add_duration_template": false,
+    "use_system_prompt": false,
+    "cosmos3_meta_init": true,
+    "vae_cpu_offload": false,
+    "cpu_offload": false,
+    "offload_granularity": "block"
+}
@@ -0,0 +1,30 @@
+{
+    "infer_steps": 30,
+    "sample_guide_scale": 1.0,
+    "sample_shift": 10.0,
+    "target_height": 720,
+    "target_width": 640,
+    "target_video_length": 17,
+    "target_fps": 10.0,
+    "enable_cfg": true,
+    "action_mode": "forward_dynamics",
+    "domain_name": "agibotworld",
+    "view_point": "concat_view",
+    "action_chunk_size": 16,
+    "action_chunk_index": 0,
+    "action_multichunk": true,
+    "action_num_chunks": 4,
+    "feature_caching": "NoCaching",
+    "rms_norm_type": "one-pass",
+    "attn_rms_norm_type": "one-pass",
+    "rope_type": "triton",
+    "self_attn_type": "flash_attn3",
+    "causal_self_attn_type": "flash_attn3",
+    "add_resolution_template": false,
+    "add_duration_template": false,
+    "use_system_prompt": false,
+    "cosmos3_meta_init": true,
+    "vae_cpu_offload": false,
+    "cpu_offload": false,
+    "offload_granularity": "block"
+}
@@ -0,0 +1,27 @@
+{
+    "infer_steps": 30,
+    "sample_guide_scale": 1.0,
+    "sample_shift": 10.0,
+    "target_height": 480,
+    "target_width": 832,
+    "target_video_length": 61,
+    "target_fps": 10.0,
+    "enable_cfg": true,
+    "action_mode": "inverse_dynamics",
+    "domain_name": "av",
+    "view_point": "ego_view",
+    "action_chunk_size": 60,
+    "feature_caching": "NoCaching",
+    "rms_norm_type": "one-pass",
+    "attn_rms_norm_type": "one-pass",
+    "rope_type": "triton",
+    "self_attn_type": "flash_attn3",
+    "causal_self_attn_type": "flash_attn3",
+    "add_resolution_template": false,
+    "add_duration_template": false,
+    "use_system_prompt": false,
+    "cosmos3_meta_init": true,
+    "vae_cpu_offload": false,
+    "cpu_offload": false,
+    "offload_granularity": "block"
+}
@@ -0,0 +1,23 @@
+{
+    "infer_steps": 35,
+    "sample_guide_scale": 6.0,
+    "sample_shift": 10.0,
+    "target_height": 720,
+    "target_width": 1280,
+    "target_video_length": 189,
+    "target_fps": 24.0,
+    "enable_cfg": true,
+    "enable_sound": true,
+    "feature_caching": "NoCaching",
+    "rms_norm_type": "one-pass",
+    "attn_rms_norm_type": "one-pass",
+    "rope_type": "triton",
+    "self_attn_type": "flash_attn3",
+    "causal_self_attn_type": "flash_attn3",
+    "add_resolution_template": false,
+    "add_duration_template": false,
+    "cosmos3_meta_init": true,
+    "vae_cpu_offload": false,
+    "cpu_offload": false,
+    "offload_granularity": "block"
+}
@@ -0,0 +1,22 @@
+{
+    "infer_steps": 35,
+    "sample_guide_scale": 6.0,
+    "sample_shift": 10.0,
+    "target_height": 720,
+    "target_width": 1280,
+    "target_video_length": 189,
+    "target_fps": 24.0,
+    "enable_cfg": true,
+    "feature_caching": "NoCaching",
+    "rms_norm_type": "one-pass",
+    "attn_rms_norm_type": "one-pass",
+    "rope_type": "triton",
+    "self_attn_type": "flash_attn3",
+    "causal_self_attn_type": "flash_attn3",
+    "add_resolution_template": false,
+    "add_duration_template": false,
+    "cosmos3_meta_init": true,
+    "vae_cpu_offload": false,
+    "cpu_offload": false,
+    "offload_granularity": "block"
+}
@@ -0,0 +1,23 @@
+{
+    "infer_steps": 35,
+    "sample_guide_scale": 6.0,
+    "sample_shift": 10.0,
+    "target_height": 720,
+    "target_width": 1280,
+    "target_video_length": 189,
+    "target_fps": 24.0,
+    "enable_cfg": true,
+    "enable_sound": true,
+    "feature_caching": "NoCaching",
+    "rms_norm_type": "one-pass",
+    "attn_rms_norm_type": "one-pass",
+    "rope_type": "triton",
+    "self_attn_type": "flash_attn3",
+    "causal_self_attn_type": "flash_attn3",
+    "add_resolution_template": false,
+    "add_duration_template": false,
+    "cosmos3_meta_init": true,
+    "vae_cpu_offload": false,
+    "cpu_offload": false,
+    "offload_granularity": "block"
+}
@@ -0,0 +1,22 @@
+{
+    "infer_steps": 35,
+    "sample_guide_scale": 6.0,
+    "sample_shift": 10.0,
+    "target_height": 720,
+    "target_width": 1280,
+    "target_video_length": 189,
+    "target_fps": 24.0,
+    "enable_cfg": true,
+    "feature_caching": "NoCaching",
+    "rms_norm_type": "one-pass",
+    "attn_rms_norm_type": "one-pass",
+    "rope_type": "triton",
+    "self_attn_type": "flash_attn3",
+    "causal_self_attn_type": "flash_attn3",
+    "add_resolution_template": false,
+    "add_duration_template": false,
+    "cosmos3_meta_init": true,
+    "vae_cpu_offload": false,
+    "cpu_offload": false,
+    "offload_granularity": "block"
+}
@@ -0,0 +1,22 @@
+{
+    "infer_steps": 35,
+    "sample_guide_scale": 6.0,
+    "sample_shift": 10.0,
+    "target_height": 720,
+    "target_width": 1280,
+    "target_video_length": 189,
+    "target_fps": 24.0,
+    "enable_cfg": true,
+    "feature_caching": "NoCaching",
+    "rms_norm_type": "one-pass",
+    "attn_rms_norm_type": "one-pass",
+    "rope_type": "triton",
+    "self_attn_type": "flash_attn3",
+    "causal_self_attn_type": "flash_attn3",
+    "add_resolution_template": false,
+    "add_duration_template": false,
+    "cosmos3_meta_init": true,
+    "vae_cpu_offload": false,
+    "cpu_offload": false,
+    "offload_granularity": "block"
+}
@@ -199,6 +199,11 @@ def main():
         default=None,
         help="Directory path for lingbot camera/action control files (poses.npy, intrinsics.npy, optional action.npy).",
     )
+    parser.add_argument("--action_mode", type=str, default=None, choices=["forward_dynamics", "inverse_dynamics", "policy"], help="Cosmos3 action mode.")
+    parser.add_argument("--domain_name", type=str, default=None, help="Cosmos3 action embodiment domain name.")
+    parser.add_argument("--view_point", type=str, default=None, help="Cosmos3 action viewpoint label.")
+    parser.add_argument("--action_chunk_size", type=int, default=None, help="Cosmos3 action chunk size.")
+    parser.add_argument("--action_chunk_index", type=int, default=None, help="Cosmos3 action chunk index when action_path contains action_chunks.")
     parser.add_argument(
         "--action_ckpt",
         type=str,