[AMD] Re-apply: Update search-space for ATOM DSR1 configs (#792)

cquil11 · web-flow · commit 8944a358cce8 · 2026-02-25T13:05:40.000-06:00
* Revert "Revert "[AMD] Update search-space for ATOM DSR1 configs (#699)" (#791)" This reverts commit 96e54ca. * Update amd-master.yaml
diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml
@@ -33,17 +33,17 @@ dsr1-fp4-mi355x-atom:
   - isl: 1024
     osl: 1024
     search-space:
-    - { tp: 4, ep: 1, conc-start: 32, conc-end: 128 }
+    - { tp: 4, ep: 1, conc-start: 32, conc-end: 256 }
     - { tp: 8, ep: 1, conc-start: 4, conc-end: 32 }
   - isl: 1024
     osl: 8192
     search-space:
-    - { tp: 4, ep: 1, conc-start: 128, conc-end: 128 }
+    - { tp: 4, ep: 1, conc-start: 128, conc-end: 256 }
     - { tp: 8, ep: 1, conc-start: 4, conc-end: 128 }
   - isl: 8192
     osl: 1024
     search-space:
-    - { tp: 4, ep: 1, conc-start: 4, conc-end: 128 }
+    - { tp: 4, ep: 1, conc-start: 4, conc-end: 256 }
     - { tp: 8, ep: 1, conc-start: 4, conc-end: 4 }
 
 dsr1-fp4-mi355x-atom-mtp:
@@ -64,11 +64,13 @@ dsr1-fp4-mi355x-atom-mtp:
   - isl: 1024
     osl: 8192
     search-space:
-    - { tp: 8, conc-start: 256, conc-end: 256, spec-decoding: mtp  }
+    # - { tp: 4, conc-start: 4, conc-end: 256, spec-decoding: mtp }
+    - { tp: 8, conc-start: 4, conc-end: 256, spec-decoding: mtp }
   - isl: 8192
     osl: 1024
     search-space:
-    - { tp: 8, conc-start: 4, conc-end: 256, spec-decoding: mtp  }
+    #- { tp: 4, conc-start: 32, conc-end: 256, spec-decoding: mtp }
+    - { tp: 8, conc-start: 4, conc-end: 256, spec-decoding: mtp }
 
 dsr1-fp8-mi300x-sglang:
   image: lmsysorg/sglang:v0.5.8-rocm700-mi30x
diff --git a/perf-changelog.yaml b/perf-changelog.yaml
@@ -723,3 +723,14 @@
     - "Gains: CUTLASS MoE optimizations (~8% throughput), FP4 kernel improvements (~4% E2E on B200), torch.compile cold-start fix"
     - "v0.15.1 includes fix for prefix cache hit rate of 0% on GPT-OSS hybrid attention models"
   pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/789
+  
+- config-keys:
+    - dsr1-fp4-mi355x-atom
+    - dsr1-fp4-mi355x-atom-mtp
+  description:
+    - "Update search-space configurations for DSR1 FP4 MI355X ATOM and ATOM-MTP"
+    - "Comment out TP=4 configs, consolidate to TP=8 only"
+    - "Extend concurrency range to conc-end: 256 across all sequence lengths (1k1k, 1k8k, 8k1k)"
+    - "Fix MTP 1k8k conc-start from 256 to 4 to enable full concurrency sweep"
+  pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/699
+