Skip to content

Commit a06270a

Browse files
committed
Update collectives configurations for different slice sizes.
. .
1 parent 1e6c308 commit a06270a

6 files changed

Lines changed: 126 additions & 0 deletions

File tree

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
benchmarks:
2+
- benchmark_name: all_gather
3+
benchmark_sweep_params:
4+
- {matrix_dim_range: {start: 4, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "2x2x2", ici_size_range: 8, sharding_strategy: "2x2x1", op_dimension: 1, num_runs: 5} # Parallel Replica
5+
- {matrix_dim_range: {start: 4, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "2x2x2", ici_size_range: 8, sharding_strategy: "2x2x2", op_dimension: 1, num_runs: 5} # Non-Parallel Replica
6+
trace_dir: "../microbenchmarks/all_gather_tpu7x_2x2x1"
7+
csv_path: "../microbenchmarks/all_gather_tpu7x_2x2x1"
8+
xlml_metrics_dir: "../microbenchmarks/all_gather_tpu7x_2x2x1"
9+
xla_dump_dir: "../microbenchmarks/all_gather_tpu7x_2x2x1/hlo_graphs"
10+
- benchmark_name: psum
11+
benchmark_sweep_params:
12+
- {matrix_dim_range: {start: 4, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "2x2x2", ici_size_range: 8, sharding_strategy: "2x2x1", op_dimension: 1, num_runs: 5} # Parallel Replica
13+
- {matrix_dim_range: {start: 4, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "2x2x2", ici_size_range: 8, sharding_strategy: "2x2x2", op_dimension: 1, num_runs: 5} # Non-Parallel Replica
14+
trace_dir: "../microbenchmarks/psum_tpu7x_2x2x1"
15+
csv_path: "../microbenchmarks/psum_tpu7x_2x2x1"
16+
xlml_metrics_dir: "../microbenchmarks/psum_tpu7x_2x2x1"
17+
xla_dump_dir: "../microbenchmarks/psum_tpu7x_2x2x1/hlo_graphs"
18+
- benchmark_name: all_to_all
19+
benchmark_sweep_params:
20+
- {matrix_dim_range: {start: 4, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "2x2x2", ici_size_range: 8, sharding_strategy: "2x2x1", op_dimension: 1, num_runs: 5} # Parallel Replica
21+
- {matrix_dim_range: {start: 4, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "2x2x2", ici_size_range: 8, sharding_strategy: "2x2x2", op_dimension: 1, num_runs: 5} # Non-Parallel Replica
22+
trace_dir: "../microbenchmarks/all_to_all_tpu7x_2x2x1"
23+
csv_path: "../microbenchmarks/all_to_all_tpu7x_2x2x1"
24+
xlml_metrics_dir: "../microbenchmarks/all_to_all_tpu7x_2x2x1"
25+
xla_dump_dir: "../microbenchmarks/all_to_all_tpu7x_2x2x1/hlo_graphs"
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
benchmarks:
2+
- benchmark_name: all_gather
3+
benchmark_sweep_params:
4+
- {matrix_dim_range: {start: 8, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "4x2x2", ici_size_range: 16, sharding_strategy: "4x2x1", op_dimension: 1, num_runs: 5} # Parallel Replica
5+
- {matrix_dim_range: {start: 8, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "4x2x2", ici_size_range: 16, sharding_strategy: "4x2x2", op_dimension: 1, num_runs: 5} # Non-Parallel Replica
6+
trace_dir: "../microbenchmarks/all_gather_tpu7x_2x2x2"
7+
csv_path: "../microbenchmarks/all_gather_tpu7x_2x2x2"
8+
xlml_metrics_dir: "../microbenchmarks/all_gather_tpu7x_2x2x2"
9+
xla_dump_dir: "../microbenchmarks/all_gather_tpu7x_2x2x2/hlo_graphs"
10+
- benchmark_name: psum
11+
benchmark_sweep_params:
12+
- {matrix_dim_range: {start: 8, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "4x2x2", ici_size_range: 16, sharding_strategy: "4x2x1", op_dimension: 1, num_runs: 5} # Parallel Replica
13+
- {matrix_dim_range: {start: 8, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "4x2x2", ici_size_range: 16, sharding_strategy: "4x2x2", op_dimension: 1, num_runs: 5} # Non-Parallel Replica
14+
trace_dir: "../microbenchmarks/psum_tpu7x_2x2x2"
15+
csv_path: "../microbenchmarks/psum_tpu7x_2x2x2"
16+
xlml_metrics_dir: "../microbenchmarks/psum_tpu7x_2x2x2"
17+
xla_dump_dir: "../microbenchmarks/psum_tpu7x_2x2x2/hlo_graphs"
18+
- benchmark_name: all_to_all
19+
benchmark_sweep_params:
20+
- {matrix_dim_range: {start: 8, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "4x2x2", ici_size_range: 16, sharding_strategy: "4x2x1", op_dimension: 1, num_runs: 5} # Parallel Replica
21+
- {matrix_dim_range: {start: 8, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "4x2x2", ici_size_range: 16, sharding_strategy: "4x2x2", op_dimension: 1, num_runs: 5} # Non-Parallel Replica
22+
trace_dir: "../microbenchmarks/all_to_all_tpu7x_2x2x2"
23+
csv_path: "../microbenchmarks/all_to_all_tpu7x_2x2x2"
24+
xlml_metrics_dir: "../microbenchmarks/all_to_all_tpu7x_2x2x2"
25+
xla_dump_dir: "../microbenchmarks/all_to_all_tpu7x_2x2x2/hlo_graphs"
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
benchmarks:
2+
- benchmark_name: all_gather
3+
benchmark_sweep_params:
4+
- {matrix_dim_range: {start: 16, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "4x4x2", ici_size_range: 32, sharding_strategy: "4x4x1", op_dimension: 1, num_runs: 5} # Parallel Replica
5+
- {matrix_dim_range: {start: 16, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "4x4x2", ici_size_range: 32, sharding_strategy: "4x4x2", op_dimension: 1, num_runs: 5} # Non-Parallel Replica
6+
trace_dir: "../microbenchmarks/all_gather_tpu7x_2x2x4"
7+
csv_path: "../microbenchmarks/all_gather_tpu7x_2x2x4"
8+
xlml_metrics_dir: "../microbenchmarks/all_gather_tpu7x_2x2x4"
9+
xla_dump_dir: "../microbenchmarks/all_gather_tpu7x_2x2x4/hlo_graphs"
10+
- benchmark_name: psum
11+
benchmark_sweep_params:
12+
- {matrix_dim_range: {start: 16, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "4x4x2", ici_size_range: 32, sharding_strategy: "4x4x1", op_dimension: 1, num_runs: 5} # Parallel Replica
13+
- {matrix_dim_range: {start: 16, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "4x4x2", ici_size_range: 32, sharding_strategy: "4x4x2", op_dimension: 1, num_runs: 5} # Non-Parallel Replica
14+
trace_dir: "../microbenchmarks/psum_tpu7x_2x2x4"
15+
csv_path: "../microbenchmarks/psum_tpu7x_2x2x4"
16+
xlml_metrics_dir: "../microbenchmarks/psum_tpu7x_2x2x4"
17+
xla_dump_dir: "../microbenchmarks/psum_tpu7x_2x2x4/hlo_graphs"
18+
- benchmark_name: all_to_all
19+
benchmark_sweep_params:
20+
- {matrix_dim_range: {start: 16, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "4x4x2", ici_size_range: 32, sharding_strategy: "4x4x1", op_dimension: 1, num_runs: 5} # Parallel Replica
21+
- {matrix_dim_range: {start: 16, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "4x4x2", ici_size_range: 32, sharding_strategy: "4x4x2", op_dimension: 1, num_runs: 5} # Non-Parallel Replica
22+
trace_dir: "../microbenchmarks/all_to_all_tpu7x_2x2x4"
23+
csv_path: "../microbenchmarks/all_to_all_tpu7x_2x2x4"
24+
xlml_metrics_dir: "../microbenchmarks/all_to_all_tpu7x_2x2x4"
25+
xla_dump_dir: "../microbenchmarks/all_to_all_tpu7x_2x2x4/hlo_graphs"
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
benchmarks:
2+
- benchmark_name: all_gather
3+
benchmark_sweep_params:
4+
- {matrix_dim_range: {start: 32, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "8x4x2", ici_size_range: 64, sharding_strategy: "8x4x1", op_dimension: 1, num_runs: 5} # Parallel Replica
5+
- {matrix_dim_range: {start: 32, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "8x4x2", ici_size_range: 64, sharding_strategy: "8x4x2", op_dimension: 1, num_runs: 5} # Non-Parallel Replica
6+
trace_dir: "../microbenchmarks/all_gather_tpu7x_2x4x4"
7+
csv_path: "../microbenchmarks/all_gather_tpu7x_2x4x4"
8+
xlml_metrics_dir: "../microbenchmarks/all_gather_tpu7x_2x4x4"
9+
xla_dump_dir: "../microbenchmarks/all_gather_tpu7x_2x4x4/hlo_graphs"
10+
- benchmark_name: psum
11+
benchmark_sweep_params:
12+
- {matrix_dim_range: {start: 32, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "8x4x2", ici_size_range: 64, sharding_strategy: "8x4x1", op_dimension: 1, num_runs: 5} # Parallel Replica
13+
- {matrix_dim_range: {start: 32, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "8x4x2", ici_size_range: 64, sharding_strategy: "8x4x2", op_dimension: 1, num_runs: 5} # Non-Parallel Replica
14+
trace_dir: "../microbenchmarks/psum_tpu7x_2x4x4"
15+
csv_path: "../microbenchmarks/psum_tpu7x_2x4x4"
16+
xlml_metrics_dir: "../microbenchmarks/psum_tpu7x_2x4x4"
17+
xla_dump_dir: "../microbenchmarks/psum_tpu7x_2x4x4/hlo_graphs"
18+
- benchmark_name: all_to_all
19+
benchmark_sweep_params:
20+
- {matrix_dim_range: {start: 32, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "8x4x2", ici_size_range: 64, sharding_strategy: "8x4x1", op_dimension: 1, num_runs: 5} # Parallel Replica
21+
- {matrix_dim_range: {start: 32, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "8x4x2", ici_size_range: 64, sharding_strategy: "8x4x2", op_dimension: 1, num_runs: 5} # Non-Parallel Replica
22+
trace_dir: "../microbenchmarks/all_to_all_tpu7x_2x4x4"
23+
csv_path: "../microbenchmarks/all_to_all_tpu7x_2x4x4"
24+
xlml_metrics_dir: "../microbenchmarks/all_to_all_tpu7x_2x4x4"
25+
xla_dump_dir: "../microbenchmarks/all_to_all_tpu7x_2x4x4/hlo_graphs"
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
benchmarks:
2+
- benchmark_name: all_gather
3+
benchmark_sweep_params:
4+
- {matrix_dim_range: {start: 64, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "16x4x2", ici_size_range: 128, sharding_strategy: "16x4x1", op_dimension: 1, num_runs: 5} # Parallel Replica
5+
- {matrix_dim_range: {start: 64, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "16x4x2", ici_size_range: 128, sharding_strategy: "16x4x2", op_dimension: 1, num_runs: 5} # Non-Parallel Replica
6+
trace_dir: "../microbenchmarks/all_gather_tpu7x_4x4x4"
7+
csv_path: "../microbenchmarks/all_gather_tpu7x_4x4x4"
8+
xlml_metrics_dir: "../microbenchmarks/all_gather_tpu7x_4x4x4"
9+
xla_dump_dir: "../microbenchmarks/all_gather_tpu7x_4x4x4/hlo_graphs"
10+
- benchmark_name: psum
11+
benchmark_sweep_params:
12+
- {matrix_dim_range: {start: 64, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "16x4x2", ici_size_range: 128, sharding_strategy: "16x4x1", op_dimension: 1, num_runs: 5} # Parallel Replica
13+
- {matrix_dim_range: {start: 64, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "16x4x2", ici_size_range: 128, sharding_strategy: "16x4x2", op_dimension: 1, num_runs: 5} # Non-Parallel Replica
14+
trace_dir: "../microbenchmarks/psum_tpu7x_4x4x4"
15+
csv_path: "../microbenchmarks/psum_tpu7x_4x4x4"
16+
xlml_metrics_dir: "../microbenchmarks/psum_tpu7x_4x4x4"
17+
xla_dump_dir: "../microbenchmarks/psum_tpu7x_4x4x4/hlo_graphs"
18+
- benchmark_name: all_to_all
19+
benchmark_sweep_params:
20+
- {matrix_dim_range: {start: 64, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "16x4x2", ici_size_range: 128, sharding_strategy: "16x4x1", op_dimension: 1, num_runs: 5} # Parallel Replica
21+
- {matrix_dim_range: {start: 64, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "16x4x2", ici_size_range: 128, sharding_strategy: "16x4x2", op_dimension: 1, num_runs: 5} # Non-Parallel Replica
22+
trace_dir: "../microbenchmarks/all_to_all_tpu7x_4x4x4"
23+
csv_path: "../microbenchmarks/all_to_all_tpu7x_4x4x4"
24+
xlml_metrics_dir: "../microbenchmarks/all_to_all_tpu7x_4x4x4"
25+
xla_dump_dir: "../microbenchmarks/all_to_all_tpu7x_4x4x4/hlo_graphs"

Ironwood/src/benchmark_utils.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,7 @@ def multiple_iteration_get_metrics_from_trace(trace: dict[str, Any], task: str =
193193
marker_done_events = marker_call_done_events
194194
unique_pids = set([e["pid"] for e in marker_done_events])
195195
print(f"Unique PIDs: {unique_pids}")
196+
print("Stop!!!!!!!!!")
196197
if not marker_done_events:
197198
event_matcher = re.compile(task)
198199

0 commit comments

Comments
 (0)