Skip to content

Commit 89eecaf

Browse files
committed
Optimizing
1 parent 1907768 commit 89eecaf

5 files changed

Lines changed: 663 additions & 49 deletions

File tree

Ironwood/configs/training/gemm_all_reduce.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,4 @@ benchmarks:
1111
- {m: 4096, k: 4096, n: 4096, dtype: "bfloat16"}
1212
- {m: 8192, k: 8192, n: 8192, dtype: "bfloat16"}
1313
- {m: 16384, k: 16384, n: 16384, dtype: "bfloat16"}
14+
- {m: 32768, k: 32768, n: 32768, dtype: "bfloat16"}
Lines changed: 57 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,73 @@
11
benchmarks:
2-
- benchmark_name: gemm_only
2+
# - benchmark_name: gemm_only
3+
# trace_dir: "../microbenchmarks/gemm_all_reduce"
4+
# csv_path: "../microbenchmarks/gemm_all_reduce"
5+
# xlml_metrics_dir: "../microbenchmarks/gemm_all_reduce"
6+
# xla_dump_dir: "../microbenchmarks/gemm_all_reduce/hlo_graphs"
7+
# num_runs: 10
8+
# benchmark_sweep_params:
9+
# - {m: 4096, k: 4096, n: 4096, dtype: "bfloat16"}
10+
# - {m: 8192, k: 8192, n: 8192, dtype: "bfloat16"}
11+
# - {m: 16384, k: 16384, n: 16384, dtype: "bfloat16"}
12+
# - {m: 32768, k: 32768, n: 32768, dtype: "bfloat16"}
13+
# - benchmark_name: all_reduce_only
14+
# trace_dir: "../microbenchmarks/gemm_all_reduce"
15+
# csv_path: "../microbenchmarks/gemm_all_reduce"
16+
# xlml_metrics_dir: "../microbenchmarks/gemm_all_reduce"
17+
# xla_dump_dir: "../microbenchmarks/gemm_all_reduce/hlo_graphs"
18+
# num_runs: 10
19+
# benchmark_sweep_params:
20+
# - {m: 4096, k: 4096, n: 4096, dtype: "bfloat16"}
21+
# - {m: 8192, k: 8192, n: 8192, dtype: "bfloat16"}
22+
# - {m: 16384, k: 16384, n: 16384, dtype: "bfloat16"}
23+
# - {m: 32768, k: 32768, n: 32768, dtype: "bfloat16"}
24+
- benchmark_name: gemm_all_reduce
325
trace_dir: "../microbenchmarks/gemm_all_reduce"
426
csv_path: "../microbenchmarks/gemm_all_reduce"
527
xlml_metrics_dir: "../microbenchmarks/gemm_all_reduce"
628
xla_dump_dir: "../microbenchmarks/gemm_all_reduce/hlo_graphs"
729
num_runs: 10
8-
benchmark_params:
9-
- m: 4096
10-
k: 4096
11-
n: 4096
12-
dtype: "bfloat16"
13-
- benchmark_name: all_reduce_only
30+
benchmark_sweep_params:
31+
- {m: 4096, k: 4096, n: 4096, dtype: "bfloat16"}
32+
- {m: 8192, k: 8192, n: 8192, dtype: "bfloat16"}
33+
- {m: 16384, k: 16384, n: 16384, dtype: "bfloat16"}
34+
- {m: 32768, k: 32768, n: 32768, dtype: "bfloat16"}
35+
- benchmark_name: gemm_reducescatter_allgather
1436
trace_dir: "../microbenchmarks/gemm_all_reduce"
1537
csv_path: "../microbenchmarks/gemm_all_reduce"
1638
xlml_metrics_dir: "../microbenchmarks/gemm_all_reduce"
1739
xla_dump_dir: "../microbenchmarks/gemm_all_reduce/hlo_graphs"
1840
num_runs: 10
19-
benchmark_params:
20-
- m: 4096
21-
k: 4096 # Passed to maintain signature, though not used for shape of C
22-
n: 4096
23-
dtype: "bfloat16"
24-
- benchmark_name: gemm_all_reduce
41+
benchmark_sweep_params:
42+
- {m: 4096, k: 4096, n: 4096, dtype: "bfloat16"}
43+
- {m: 8192, k: 8192, n: 8192, dtype: "bfloat16"}
44+
- {m: 16384, k: 16384, n: 16384, dtype: "bfloat16"}
45+
- {m: 32768, k: 32768, n: 32768, dtype: "bfloat16"}
46+
- benchmark_name: gemm_sharded_all_gather
47+
trace_dir: "../microbenchmarks/gemm_all_reduce"
48+
csv_path: "../microbenchmarks/gemm_all_reduce"
49+
xlml_metrics_dir: "../microbenchmarks/gemm_all_reduce"
50+
xla_dump_dir: "../microbenchmarks/gemm_all_reduce/hlo_graphs"
51+
num_runs: 10
52+
benchmark_sweep_params:
53+
- {m: 4096, k: 4096, n: 4096, dtype: "bfloat16"}
54+
- {m: 8192, k: 8192, n: 8192, dtype: "bfloat16"}
55+
- {m: 16384, k: 16384, n: 16384, dtype: "bfloat16"}
56+
- {m: 32768, k: 32768, n: 32768, dtype: "bfloat16"}
57+
- {m: 65536, k: 65536, n: 65536, dtype: "bfloat16"}
58+
- {m: 131072, k: 131072, n: 131072, dtype: "bfloat16"}
59+
- {m: 262144, k: 262144, n: 262144, dtype: "bfloat16"}
60+
- {m: 524288, k: 524288, n: 524288, dtype: "bfloat16"}
61+
- benchmark_name: gemm_k_sharded_all_reduce
2562
trace_dir: "../microbenchmarks/gemm_all_reduce"
2663
csv_path: "../microbenchmarks/gemm_all_reduce"
2764
xlml_metrics_dir: "../microbenchmarks/gemm_all_reduce"
2865
xla_dump_dir: "../microbenchmarks/gemm_all_reduce/hlo_graphs"
2966
num_runs: 10
30-
benchmark_params:
31-
- m: 4096
32-
k: 4096
33-
n: 4096
34-
dtype: "bfloat16"
67+
benchmark_sweep_params:
68+
- {m: 4096, k: 4096, n: 4096, dtype: "bfloat16"}
69+
- {m: 8192, k: 8192, n: 8192, dtype: "bfloat16"}
70+
- {m: 16384, k: 16384, n: 16384, dtype: "bfloat16"}
71+
- {m: 32768, k: 32768, n: 32768, dtype: "bfloat16"}
72+
- {m: 65536, k: 65536, n: 65536, dtype: "bfloat16"}
73+
- {m: 131072, k: 131072, n: 131072, dtype: "bfloat16"}

0 commit comments

Comments
 (0)