ModelTC
diff --git a/‎configs/disagg/qwen/qwen_image_i2i_disagg_decode.json‎
Lines changed: 20 additions & 0 deletions b/‎configs/disagg/qwen/qwen_image_i2i_disagg_decode.json‎
Lines changed: 20 additions & 0 deletions
diff --git a/‎configs/disagg/qwen/qwen_image_i2i_disagg_encoder.json‎
Lines changed: 31 additions & 0 deletions b/‎configs/disagg/qwen/qwen_image_i2i_disagg_encoder.json‎
Lines changed: 31 additions & 0 deletions
diff --git a/‎configs/disagg/qwen/qwen_image_i2i_disagg_transformer.json‎
Lines changed: 32 additions & 0 deletions b/‎configs/disagg/qwen/qwen_image_i2i_disagg_transformer.json‎
Lines changed: 32 additions & 0 deletions
diff --git a/‎configs/disagg/qwen/qwen_image_t2i_disagg_controller.json‎
Lines changed: 29 additions & 0 deletions b/‎configs/disagg/qwen/qwen_image_t2i_disagg_controller.json‎
Lines changed: 29 additions & 0 deletions
diff --git a/‎configs/disagg/qwen/qwen_image_t2i_disagg_decode.json‎
Lines changed: 21 additions & 0 deletions b/‎configs/disagg/qwen/qwen_image_t2i_disagg_decode.json‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎configs/disagg/qwen/qwen_image_t2i_disagg_decode_decentralized.json‎
Lines changed: 28 additions & 0 deletions b/‎configs/disagg/qwen/qwen_image_t2i_disagg_decode_decentralized.json‎
Lines changed: 28 additions & 0 deletions
diff --git a/‎configs/disagg/qwen/qwen_image_t2i_disagg_encoder.json‎
Lines changed: 30 additions & 0 deletions b/‎configs/disagg/qwen/qwen_image_t2i_disagg_encoder.json‎
Lines changed: 30 additions & 0 deletions
diff --git a/‎configs/disagg/qwen/qwen_image_t2i_disagg_encoder_decentralized.json‎
Lines changed: 33 additions & 0 deletions b/‎configs/disagg/qwen/qwen_image_t2i_disagg_encoder_decentralized.json‎
Lines changed: 33 additions & 0 deletions
diff --git a/‎configs/disagg/qwen/qwen_image_t2i_disagg_transformer.json‎
Lines changed: 32 additions & 0 deletions b/‎configs/disagg/qwen/qwen_image_t2i_disagg_transformer.json‎
Lines changed: 32 additions & 0 deletions
diff --git a/‎configs/disagg/qwen/qwen_image_t2i_disagg_transformer_decentralized.json‎
Lines changed: 41 additions & 0 deletions b/‎configs/disagg/qwen/qwen_image_t2i_disagg_transformer_decentralized.json‎
Lines changed: 41 additions & 0 deletions
@@ -0,0 +1,20 @@
+{
+    "task": "i2i",
+    "disagg_mode": "decode",
+    "infer_steps": 40,
+    "vae_scale_factor": 8,
+    "vae_z_dim": 16,
+    "vae_stride": [1, 8, 8],
+    "target_video_length": 1,
+    "target_height": 1664,
+    "target_width": 1664,
+    "disagg_config": {
+        "bootstrap_addr": "127.0.0.1",
+        "bootstrap_room": 2,
+        "sender_engine_rank": 1,
+        "receiver_engine_rank": 2,
+        "protocol": "rdma",
+        "local_hostname": "localhost",
+        "metadata_server": "P2PHANDSHAKE"
+    }
+}
@@ -0,0 +1,31 @@
+{
+    "task": "i2i",
+    "disagg_mode": "encoder",
+    "text_encoder_type": "lightllm_kernel",
+    "text_encoder_dim": 3584,
+    "text_len": 4096,
+    "infer_steps": 40,
+    "prompt_template_encode": "<|im_start|>system\nDescribe the key features of the input image (color, shape, size, texture, objects, background), then explain how the user's text instruction should alter or modify the image. Generate a new image that meets the user's requirements while maintaining consistency with the original input where appropriate.<|im_end|>\n<|im_start|>user\n{}<|im_end|>\n<|im_start|>assistant\n",
+    "prompt_template_encode_start_idx": 64,
+    "resize_mode": "adaptive",
+    "attn_type": "flash_attn3",
+    "enable_cfg": true,
+    "sample_guide_scale": 4.0,
+    "CONDITION_IMAGE_SIZE": 147456,
+    "USE_IMAGE_ID_IN_PROMPT": true,
+    "vae_z_dim": 16,
+    "vae_stride": [1, 8, 8],
+    "target_video_length": 1,
+    "target_height": 1664,
+    "target_width": 1664,
+    "disagg_config": {
+        "bootstrap_addr": "127.0.0.1",
+        "bootstrap_room": 1,
+        "sender_engine_rank": 0,
+        "receiver_engine_rank": 1,
+        "protocol": "rdma",
+        "local_hostname": "localhost",
+        "metadata_server": "P2PHANDSHAKE",
+        "device_name": ""
+    }
+}
@@ -0,0 +1,32 @@
+{
+    "task": "i2i",
+    "disagg_mode": "transformer",
+    "text_encoder_dim": 3584,
+    "text_len": 4096,
+    "infer_steps": 40,
+    "prompt_template_encode": "<|im_start|>system\nDescribe the key features of the input image (color, shape, size, texture, objects, background), then explain how the user's text instruction should alter or modify the image. Generate a new image that meets the user's requirements while maintaining consistency with the original input where appropriate.<|im_end|>\n<|im_start|>user\n{}<|im_end|>\n<|im_start|>assistant\n",
+    "prompt_template_encode_start_idx": 64,
+    "resize_mode": "adaptive",
+    "attn_type": "flash_attn3",
+    "enable_cfg": true,
+    "sample_guide_scale": 4.0,
+    "CONDITION_IMAGE_SIZE": 147456,
+    "USE_IMAGE_ID_IN_PROMPT": true,
+    "vae_z_dim": 16,
+    "vae_stride": [1, 8, 8],
+    "target_video_length": 1,
+    "target_height": 1664,
+    "target_width": 1664,
+    "disagg_config": {
+        "bootstrap_addr": "127.0.0.1",
+        "bootstrap_room": 1,
+        "sender_engine_rank": 0,
+        "receiver_engine_rank": 1,
+        "protocol": "rdma",
+        "local_hostname": "localhost",
+        "metadata_server": "P2PHANDSHAKE",
+        "device_name": "",
+        "decoder_engine_rank": 2,
+        "decoder_bootstrap_room": 2
+    }
+}
@@ -0,0 +1,29 @@
+{
+  "task": "t2i",
+  "model_cls": "qwen_image",
+  "text_len": 4096,
+  "text_encoder_dim": 3584,
+  "infer_steps": 50,
+  "aspect_ratio": "16:9",
+  "max_custom_size": 3072,
+  "vae_z_dim": 16,
+  "vae_stride": [1, 8, 8],
+  "sample_guide_scale": 4.0,
+  "enable_cfg": true,
+  "disagg_mode": "controller",
+  "disagg_config": {
+    "bootstrap_addr": "127.0.0.1",
+    "bootstrap_room": 0,
+    "encoder_engine_rank": 0,
+    "transformer_engine_rank": 1,
+    "decoder_engine_rank": 4,
+    "protocol": "rdma",
+    "local_hostname": "localhost",
+    "metadata_server": "P2PHANDSHAKE",
+    "rdma_buffer_slots": 128,
+    "rdma_buffer_slot_size": 4096,
+    "rdma_request_handshake_port": 5566,
+    "rdma_phase1_handshake_port": 5567,
+    "rdma_phase2_handshake_port": 5568
+  }
+}
@@ -0,0 +1,21 @@
+{
+    "task": "t2i",
+    "disagg_mode": "decode",
+    "infer_steps": 50,
+    "vae_scale_factor": 8,
+    "vae_z_dim": 16,
+    "vae_stride": [1, 8, 8],
+    "target_video_length": 1,
+    "target_height": 1536,
+    "target_width": 2752,
+    "max_custom_size": 3072,
+    "disagg_config": {
+        "bootstrap_addr": "127.0.0.1",
+        "bootstrap_room": 2,
+        "sender_engine_rank": 1,
+        "receiver_engine_rank": 2,
+        "protocol": "rdma",
+        "local_hostname": "localhost",
+        "metadata_server": "P2PHANDSHAKE"
+    }
+}
@@ -0,0 +1,28 @@
+{
+    "task": "t2i",
+    "model_cls": "qwen_image",
+    "disagg_mode": "decode",
+    "infer_steps": 50,
+    "vae_scale_factor": 8,
+    "vae_z_dim": 16,
+    "vae_stride": [1, 8, 8],
+    "disagg_config": {
+        "bootstrap_addr": "127.0.0.1",
+        "bootstrap_room": 0,
+        "sender_engine_rank": 1,
+        "receiver_engine_rank": 4,
+        "protocol": "rdma",
+        "local_hostname": "localhost",
+        "metadata_server": "P2PHANDSHAKE",
+        "decentralized_queue": true,
+        "encoder_engine_rank": 0,
+        "transformer_engine_rank": 1,
+        "decoder_engine_rank": 4,
+        "rdma_phase1_host": "127.0.0.1",
+        "rdma_phase1_handshake_port": 5567,
+        "rdma_phase2_host": "127.0.0.1",
+        "rdma_phase2_handshake_port": 5568,
+        "rdma_buffer_slots": 128,
+        "rdma_buffer_slot_size": 4096
+    }
+}
@@ -0,0 +1,30 @@
+{
+    "task": "t2i",
+    "disagg_mode": "encoder",
+    "text_encoder_type": "lightllm_kernel",
+    "text_encoder_dim": 3584,
+    "text_len": 4096,
+    "infer_steps": 50,
+    "aspect_ratio": "16:9",
+    "max_custom_size": 3072,
+    "vae_z_dim": 16,
+    "vae_stride": [1, 8, 8],
+    "target_video_length": 1,
+    "target_height": 1536,
+    "target_width": 2752,
+    "prompt_template_encode": "<|im_start|>system\nDescribe the image by detailing the color, shape, size, texture, quantity, text, spatial relationships of the objects and background:\n<|im_start|>user\n{}\n<|im_start|>assistant\n",
+    "prompt_template_encode_start_idx": 34,
+    "attn_type": "flash_attn3",
+    "enable_cfg": true,
+    "sample_guide_scale": 4.0,
+    "disagg_config": {
+        "bootstrap_addr": "127.0.0.1",
+        "bootstrap_room": 0,
+        "sender_engine_rank": 0,
+        "receiver_engine_rank": 1,
+        "protocol": "rdma",
+        "local_hostname": "localhost",
+        "metadata_server": "P2PHANDSHAKE",
+        "device_name": ""
+    }
+}
@@ -0,0 +1,33 @@
+{
+    "task": "t2i",
+    "model_cls": "qwen_image",
+    "disagg_mode": "encoder",
+    "text_encoder_type": "lightllm_kernel",
+    "text_encoder_dim": 3584,
+    "text_len": 4096,
+    "infer_steps": 50,
+    "aspect_ratio": "16:9",
+    "max_custom_size": 3072,
+    "vae_z_dim": 16,
+    "vae_stride": [1, 8, 8],
+    "prompt_template_encode": "<|im_start|>system\nDescribe the image by detailing the color, shape, size, texture, quantity, text, spatial relationships of the objects and background:\n<|im_start|>user\n{}\n<|im_start|>assistant\n",
+    "prompt_template_encode_start_idx": 34,
+    "attn_type": "flash_attn3",
+    "enable_cfg": true,
+    "sample_guide_scale": 4.0,
+    "disagg_config": {
+        "bootstrap_addr": "127.0.0.1",
+        "bootstrap_room": 0,
+        "sender_engine_rank": 0,
+        "receiver_engine_rank": 1,
+        "protocol": "rdma",
+        "local_hostname": "localhost",
+        "metadata_server": "P2PHANDSHAKE",
+        "device_name": "",
+        "decentralized_queue": true,
+        "rdma_phase1_host": "127.0.0.1",
+        "rdma_phase1_handshake_port": 5567,
+        "rdma_buffer_slots": 128,
+        "rdma_buffer_slot_size": 4096
+    }
+}
@@ -0,0 +1,32 @@
+{
+    "task": "t2i",
+    "disagg_mode": "transformer",
+    "text_encoder_dim": 3584,
+    "text_len": 4096,
+    "infer_steps": 50,
+    "aspect_ratio": "16:9",
+    "max_custom_size": 3072,
+    "prompt_template_encode": "<|im_start|>system\nDescribe the image by detailing the color, shape, size, texture, quantity, text, spatial relationships of the objects and background:<|im_end|>\n<|im_start|>user\n{}<|im_end|>\n<|im_start|>assistant\n",
+    "prompt_template_encode_start_idx": 34,
+    "attn_type": "flash_attn2",
+    "enable_cfg": true,
+    "sample_guide_scale": 4.0,
+    "vae_z_dim": 16,
+    "vae_stride": [1, 8, 8],
+    "target_video_length": 1,
+    "target_height": 1536,
+    "target_width": 2752,
+    "dit_original_ckpt": "/home/fuhaiwen/models/qwen-2512/base_dit_info_v060_res2k_9k_3k_25kiter.safetensors",
+    "disagg_config": {
+        "bootstrap_addr": "127.0.0.1",
+        "bootstrap_room": 0,
+        "sender_engine_rank": 0,
+        "receiver_engine_rank": 1,
+        "protocol": "rdma",
+        "local_hostname": "localhost",
+        "metadata_server": "P2PHANDSHAKE",
+        "device_name": "",
+        "decoder_engine_rank": 2,
+        "decoder_bootstrap_room": 2
+    }
+}
@@ -0,0 +1,41 @@
+{
+    "task": "t2i",
+    "model_cls": "qwen_image",
+    "disagg_mode": "transformer",
+    "text_encoder_dim": 3584,
+    "text_len": 4096,
+    "infer_steps": 50,
+    "aspect_ratio": "16:9",
+    "max_custom_size": 3072,
+    "prompt_template_encode": "<|im_start|>system\nDescribe the image by detailing the color, shape, size, texture, quantity, text, spatial relationships of the objects and background:\n<|im_start|>user\n{}\n<|im_start|>assistant\n",
+    "prompt_template_encode_start_idx": 34,
+    "attn_type": "flash_attn2",
+    "enable_cfg": true,
+    "sample_guide_scale": 4.0,
+    "vae_z_dim": 16,
+    "vae_stride": [1, 8, 8],
+    "target_video_length": 1,
+    "target_height": 1664,
+    "target_width": 1664,
+    "disagg_config": {
+        "bootstrap_addr": "127.0.0.1",
+        "bootstrap_room": 0,
+        "sender_engine_rank": 0,
+        "receiver_engine_rank": 1,
+        "decoder_engine_rank": 4,
+        "decoder_bootstrap_room": 0,
+        "protocol": "rdma",
+        "local_hostname": "localhost",
+        "metadata_server": "P2PHANDSHAKE",
+        "device_name": "",
+        "decentralized_queue": true,
+        "encoder_engine_rank": 0,
+        "transformer_engine_rank": 1,
+        "rdma_phase1_host": "127.0.0.1",
+        "rdma_phase1_handshake_port": 5567,
+        "rdma_phase2_host": "127.0.0.1",
+        "rdma_phase2_handshake_port": 5568,
+        "rdma_buffer_slots": 128,
+        "rdma_buffer_slot_size": 4096
+    }
+}