SemiAnalysisAI
diff --git a/‎.github/configs/amd-master.yaml‎
Lines changed: 1 addition & 1 deletion b/‎.github/configs/amd-master.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/configs/nvidia-master.yaml‎
Lines changed: 3 additions & 3 deletions b/‎.github/configs/nvidia-master.yaml‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎.github/workflows/benchmark-multinode-tmpl.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/benchmark-multinode-tmpl.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/benchmark-tmpl.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/benchmark-tmpl.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎benchmarks/single_node/dsr1_fp4_b200.sh‎ ‎…ngle_node/fixed_seq_len/dsr1_fp4_b200.sh‎benchmarks/single_node/dsr1_fp4_b200.sh renamed to benchmarks/single_node/fixed_seq_len/dsr1_fp4_b200.sh
Lines changed: 1 addition & 1 deletion b/‎benchmarks/single_node/dsr1_fp4_b200.sh‎ ‎…ngle_node/fixed_seq_len/dsr1_fp4_b200.sh‎benchmarks/single_node/dsr1_fp4_b200.sh renamed to benchmarks/single_node/fixed_seq_len/dsr1_fp4_b200.sh
Lines changed: 1 addition & 1 deletion
diff --git a/‎…chmarks/single_node/dsr1_fp4_b200_trt.sh‎ ‎…_node/fixed_seq_len/dsr1_fp4_b200_trt.sh‎benchmarks/single_node/dsr1_fp4_b200_trt.sh renamed to benchmarks/single_node/fixed_seq_len/dsr1_fp4_b200_trt.sh
Lines changed: 1 addition & 1 deletion b/‎…chmarks/single_node/dsr1_fp4_b200_trt.sh‎ ‎…_node/fixed_seq_len/dsr1_fp4_b200_trt.sh‎benchmarks/single_node/dsr1_fp4_b200_trt.sh renamed to benchmarks/single_node/fixed_seq_len/dsr1_fp4_b200_trt.sh
Lines changed: 1 addition & 1 deletion
diff --git a/‎…rks/single_node/dsr1_fp4_b200_trt_mtp.sh‎ ‎…e/fixed_seq_len/dsr1_fp4_b200_trt_mtp.sh‎benchmarks/single_node/dsr1_fp4_b200_trt_mtp.sh renamed to benchmarks/single_node/fixed_seq_len/dsr1_fp4_b200_trt_mtp.sh
Lines changed: 1 addition & 1 deletion b/‎…rks/single_node/dsr1_fp4_b200_trt_mtp.sh‎ ‎…e/fixed_seq_len/dsr1_fp4_b200_trt_mtp.sh‎benchmarks/single_node/dsr1_fp4_b200_trt_mtp.sh renamed to benchmarks/single_node/fixed_seq_len/dsr1_fp4_b200_trt_mtp.sh
Lines changed: 1 addition & 1 deletion
diff --git a/‎benchmarks/single_node/dsr1_fp4_b300.sh‎ ‎…ngle_node/fixed_seq_len/dsr1_fp4_b300.sh‎benchmarks/single_node/dsr1_fp4_b300.sh renamed to benchmarks/single_node/fixed_seq_len/dsr1_fp4_b300.sh
Lines changed: 1 addition & 1 deletion b/‎benchmarks/single_node/dsr1_fp4_b300.sh‎ ‎…ngle_node/fixed_seq_len/dsr1_fp4_b300.sh‎benchmarks/single_node/dsr1_fp4_b300.sh renamed to benchmarks/single_node/fixed_seq_len/dsr1_fp4_b300.sh
Lines changed: 1 addition & 1 deletion
diff --git a/‎benchmarks/single_node/dsr1_fp4_mi355x.sh‎ ‎…le_node/fixed_seq_len/dsr1_fp4_mi355x.sh‎benchmarks/single_node/dsr1_fp4_mi355x.sh renamed to benchmarks/single_node/fixed_seq_len/dsr1_fp4_mi355x.sh
Lines changed: 1 addition & 1 deletion b/‎benchmarks/single_node/dsr1_fp4_mi355x.sh‎ ‎…le_node/fixed_seq_len/dsr1_fp4_mi355x.sh‎benchmarks/single_node/dsr1_fp4_mi355x.sh renamed to benchmarks/single_node/fixed_seq_len/dsr1_fp4_mi355x.sh
Lines changed: 1 addition & 1 deletion
diff --git a/‎…arks/single_node/dsr1_fp4_mi355x_atom.sh‎ ‎…de/fixed_seq_len/dsr1_fp4_mi355x_atom.sh‎benchmarks/single_node/dsr1_fp4_mi355x_atom.sh renamed to benchmarks/single_node/fixed_seq_len/dsr1_fp4_mi355x_atom.sh
Lines changed: 1 addition & 1 deletion b/‎…arks/single_node/dsr1_fp4_mi355x_atom.sh‎ ‎…de/fixed_seq_len/dsr1_fp4_mi355x_atom.sh‎benchmarks/single_node/dsr1_fp4_mi355x_atom.sh renamed to benchmarks/single_node/fixed_seq_len/dsr1_fp4_mi355x_atom.sh
Lines changed: 1 addition & 1 deletion
@@ -1796,7 +1796,7 @@ dsv4-fp4-mi355x-sglang-agentic:
 # vLLM with AITER MLA decode for DSv4 on MI355X (vllm-project/vllm#40889,
 # stacked on #40871). Uses the ATOM MI355X image (ROCm 7.2.2, aiter with
 # MLA decode, MI355X GPU detection); vLLM is rebuilt from the PR branch
-# at runtime by benchmarks/single_node/dsv4_fp8_mi355x_vllm.sh at a
+# at runtime by benchmarks/single_node/fixed_seq_len/dsv4_fp8_mi355x_vllm.sh at a
 # pinned SHA. Once both PRs merge into a release, switch to a vLLM ROCm
 # MI355X image and remove the build step.
 dsv4-fp8-mi355x-vllm:
 
@@ -1704,7 +1704,7 @@ dsv4-fp4-b200-sglang:
   framework: sglang
   multinode: false
   # Two recipes from https://docs.sglang.io/cookbook/autoregressive/DeepSeek/DeepSeek-V4
-  # are selected inside benchmarks/single_node/dsv4_fp4_b200.sh by DP_ATTENTION:
+  # are selected inside benchmarks/single_node/fixed_seq_len/dsv4_fp4_b200.sh by DP_ATTENTION:
   #   low-latency  (DP_ATTENTION=false): TP-only, flashinfer_mxfp4
   #   DP-attention  (DP_ATTENTION=true):  DP-attn + DeepEP + mega_moe opts
   # The DP-attention recipe covers both "balanced" (conc 64-128) and
@@ -1998,7 +1998,7 @@ dsv4-fp4-b300-sglang:
   framework: sglang
   multinode: false
   # Three recipes from https://docs.sglang.io/cookbook/autoregressive/DeepSeek/DeepSeek-V4
-  # are selected inside benchmarks/single_node/dsv4_fp4_b300_sglang.sh by CONC:
+  # are selected inside benchmarks/single_node/fixed_seq_len/dsv4_fp4_b300_sglang.sh by CONC:
   #   low-latency    (CONC <= 32):       TP-only
   #   balanced       (32 < CONC <= 128): + DP-attn
   #   max-throughput (CONC > 128):       + DP-attn
@@ -2024,7 +2024,7 @@ dsv4-fp4-b300-sglang:
       - { tp: 8, ep: 8, dp-attn: true, conc-start: 4096, conc-end: 4096 }
 
   # DeepSeek-V4-Pro on B300 with EAGLE/MTP speculative decoding. Recipe is
-  # selected inside benchmarks/single_node/dsv4_fp4_b300_sglang_mtp.sh by
+  # selected inside benchmarks/single_node/fixed_seq_len/dsv4_fp4_b300_sglang_mtp.sh by
   # DP_ATTENTION:
   #   dp-attn: false -> TP-only + flashinfer_mxfp4 + chunked-prefill 8192
   #                     + EAGLE (3,1,4) + mem-fraction 0.90
 
@@ -139,7 +139,7 @@ env:
   EVAL_ONLY: ${{ inputs.eval-only }}
   EVAL_CONC: ${{ inputs.eval-conc }}
   SCENARIO_TYPE: ${{ inputs.scenario-type }}
-  SCENARIO_SUBDIR: ${{ inputs.scenario-type == 'agentic-coding' && 'agentic/' || '' }}
+  SCENARIO_SUBDIR: ${{ inputs.scenario-type == 'agentic-coding' && 'agentic/' || 'fixed_seq_len/' }}
   IS_AGENTIC: ${{ inputs.scenario-type == 'agentic-coding' && '1' || '0' }}
   CONC: ${{ inputs.conc }}
   DURATION: ${{ inputs.duration }}
 
@@ -109,7 +109,7 @@ env:
   RUN_EVAL: ${{ inputs.run-eval }}
   EVAL_ONLY: ${{ inputs.eval-only }}
   SCENARIO_TYPE: ${{ inputs.scenario-type }}
-  SCENARIO_SUBDIR: ${{ inputs.scenario-type == 'agentic-coding' && 'agentic/' || '' }}
+  SCENARIO_SUBDIR: ${{ inputs.scenario-type == 'agentic-coding' && 'agentic/' || 'fixed_seq_len/' }}
   IS_AGENTIC: ${{ inputs.scenario-type == 'agentic-coding' && '1' || '0' }}
   OFFLOADING: ${{ inputs.offloading }}
   TOTAL_CPU_DRAM_GB: ${{ inputs.total-cpu-dram-gb }}
 
@@ -1,6 +1,6 @@
 #!/usr/bin/env bash
 
-source "$(dirname "$0")/../benchmark_lib.sh"
+source "$(dirname "$0")/../../benchmark_lib.sh"
 
 check_env_vars \
     MODEL \
 
@@ -1,6 +1,6 @@
 #!/usr/bin/env bash
 
-source "$(dirname "$0")/../benchmark_lib.sh"
+source "$(dirname "$0")/../../benchmark_lib.sh"
 
 check_env_vars \
     MODEL \
 
@@ -1,6 +1,6 @@
 #!/usr/bin/env bash
 
-source "$(dirname "$0")/../benchmark_lib.sh"
+source "$(dirname "$0")/../../benchmark_lib.sh"
 
 check_env_vars \
     MODEL \
 
@@ -4,7 +4,7 @@
 # does not have a B300-specific recipe, so this script reuses the existing
 # DSR1 FP4 B200 SGLang recipe as-is until B300-specific tuning is available.
 
-source "$(dirname "$0")/../benchmark_lib.sh"
+source "$(dirname "$0")/../../benchmark_lib.sh"
 
 check_env_vars \
     MODEL \
 
@@ -1,6 +1,6 @@
 #!/usr/bin/env bash
 
-source "$(dirname "$0")/../benchmark_lib.sh"
+source "$(dirname "$0")/../../benchmark_lib.sh"
 
 check_env_vars \
     MODEL \
 
@@ -1,6 +1,6 @@
 #!/usr/bin/env bash
 
-source "$(dirname "$0")/../benchmark_lib.sh"
+source "$(dirname "$0")/../../benchmark_lib.sh"
 
 check_env_vars \
     MODEL \