Skip to content

Commit e4bf080

Browse files
authored
Tpu7x for demo (#87)
* Create tpu7x-4x4x4-ici-all-gather-microbenchmark.yaml * Update all_gather_tpu7x_4x4x4.yaml * Create tpu7x-2x2x1-matmul-microbenchmark.yaml * remove unnecessary runs in config files
1 parent 2c847e8 commit e4bf080

32 files changed

Lines changed: 119 additions & 63 deletions

Ironwood/configs/collectives/all_gather_1d.yaml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
benchmarks:
22
- benchmark_name: all_gather
33
benchmark_sweep_params:
4-
- {matrix_dim_range: {start: 2, end: 8192, multiplier: 2}, dtype: "float32", mesh_shape: "16x4x2", ici_size_range: 128, sharding_strategy: "1x4x1", op_dimension: 1, num_runs: 5} # Parallel Replica
5-
- {matrix_dim_range: {start: 2, end: 8192, multiplier: 2}, dtype: "float32", mesh_shape: "4x4x8", ici_size_range: 128, sharding_strategy: "1x1x4", op_dimension: 1, num_runs: 5} # Non-Parallel Replica
4+
- {matrix_dim_range: {start: 2, end: 8192, multiplier: 2}, dtype: "float32", mesh_shape: "16x4x2", ici_size_range: 128, sharding_strategy: "1x4x1", op_dimension: 1, num_runs: 5}
65
trace_dir: "../microbenchmarks/all_gather_1d"
76
csv_path: "../microbenchmarks/all_gather_1d"
87
xlml_metrics_dir: "../microbenchmarks/all_gather_1d"

Ironwood/configs/collectives/all_gather_2d.yaml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
benchmarks:
22
- benchmark_name: all_gather
33
benchmark_sweep_params:
4-
- {matrix_dim_range: {start: 2, end: 8192, multiplier: 2}, dtype: "float32", mesh_shape: "4x16x2", ici_size_range: 128, sharding_strategy: "1x16x1", op_dimension: 2, num_runs: 5} # Parallel Replica Groups
5-
- {matrix_dim_range: {start: 2, end: 8192, multiplier: 2}, dtype: "float32", mesh_shape: "4x32", ici_size_range: 128, sharding_strategy: "1x32", op_dimension: 2, num_runs: 5} # Non Parallel Replica Groups
4+
- {matrix_dim_range: {start: 2, end: 8192, multiplier: 2}, dtype: "float32", mesh_shape: "4x16x2", ici_size_range: 128, sharding_strategy: "1x16x1", op_dimension: 2, num_runs: 5}
65
trace_dir: "../microbenchmarks/all_gather_2d"
76
csv_path: "../microbenchmarks/all_gather_2d"
87
xlml_metrics_dir: "../microbenchmarks/all_gather_2d"

Ironwood/configs/collectives/all_gather_3d.yaml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
benchmarks:
22
- benchmark_name: all_gather
33
benchmark_sweep_params:
4-
- {matrix_dim_range: {start: 2, end: 8192, multiplier: 2}, dtype: "float32", mesh_shape: "64x2", ici_size_range: 128, sharding_strategy: "64x1", op_dimension: 3, num_runs: 5} # Parallel Replica Groups
5-
- {matrix_dim_range: {start: 2, end: 8192, multiplier: 2}, dtype: "float32", mesh_shape: "4x4x8", ici_size_range: 128, sharding_strategy: "4x4x8", op_dimension: 3, num_runs: 5} # Non Parallel Replica Groups
4+
- {matrix_dim_range: {start: 2, end: 8192, multiplier: 2}, dtype: "float32", mesh_shape: "64x2", ici_size_range: 128, sharding_strategy: "64x1", op_dimension: 3, num_runs: 5}
65
trace_dir: "../microbenchmarks/all_gather_3d"
76
csv_path: "../microbenchmarks/all_gather_3d"
87
xlml_metrics_dir: "../microbenchmarks/all_gather_3d"

Ironwood/configs/collectives/all_gather_demo.yaml

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,9 @@
11
benchmarks:
22
- benchmark_name: all_gather
33
benchmark_sweep_params:
4-
- {matrix_dim_range: {start: 2, end: 512, multiplier: 2}, dtype: "float32", mesh_shape: "16x4x2", ici_size_range: 128, sharding_strategy: "1x4x1", op_dimension: 1} # Parallel Replica
5-
- {matrix_dim_range: {start: 2, end: 512, multiplier: 2}, dtype: "float32", mesh_shape: "4x4x8", ici_size_range: 128, sharding_strategy: "1x1x4", op_dimension: 1} # Non-Parallel Replica
6-
- {matrix_dim_range: {start: 2, end: 512, multiplier: 2}, dtype: "float32", mesh_shape: "4x32", ici_size_range: 128, sharding_strategy: "1x32", op_dimension: 2} # Non Parallel Replica Groups
7-
- {matrix_dim_range: {start: 2, end: 512, multiplier: 2}, dtype: "float32", mesh_shape: "4x16x2", ici_size_range: 128, sharding_strategy: "1x16x1", op_dimension: 2} # Parallel Replica Groups
8-
- {matrix_dim_range: {start: 2, end: 512, multiplier: 2}, dtype: "float32", mesh_shape: "64x2", ici_size_range: 128, sharding_strategy: "64x1", op_dimension: 3} # Parallel Replica Groups
9-
- {matrix_dim_range: {start: 2, end: 512, multiplier: 2}, dtype: "float32", mesh_shape: "4x4x8", ici_size_range: 128, sharding_strategy: "4x4x8", op_dimension: 3} # Non Parallel Replica Groups
10-
4+
- {matrix_dim_range: {start: 2, end: 512, multiplier: 2}, dtype: "float32", mesh_shape: "16x4x2", ici_size_range: 128, sharding_strategy: "1x4x1", op_dimension: 1}
5+
- {matrix_dim_range: {start: 2, end: 512, multiplier: 2}, dtype: "float32", mesh_shape: "4x16x2", ici_size_range: 128, sharding_strategy: "1x16x1", op_dimension: 2}
6+
- {matrix_dim_range: {start: 2, end: 512, multiplier: 2}, dtype: "float32", mesh_shape: "64x2", ici_size_range: 128, sharding_strategy: "64x1", op_dimension: 3}
117

128
warmup_tries: 10
139
trace_dir: "../microbenchmarks/all_gather_demo"

Ironwood/configs/collectives/all_gather_tpu7x_2x2x1.yaml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
benchmarks:
22
- benchmark_name: all_gather
33
benchmark_sweep_params:
4-
- {matrix_dim_range: {start: 8, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "2x2x2", ici_size_range: 8, sharding_strategy: "2x2x1", op_dimension: 1, num_runs: 5} # Parallel Replica
5-
- {matrix_dim_range: {start: 8, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "2x2x2", ici_size_range: 8, sharding_strategy: "2x2x2", op_dimension: 1, num_runs: 5} # Non-Parallel Replica
4+
- {matrix_dim_range: {start: 8, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "2x2x2", ici_size_range: 8, sharding_strategy: "2x2x1", op_dimension: 1, num_runs: 5}
65
trace_dir: "../microbenchmarks/all_gather_tpu7x_2x2x1"
76
csv_path: "../microbenchmarks/all_gather_tpu7x_2x2x1"
87
xlml_metrics_dir: "../microbenchmarks/all_gather_tpu7x_2x2x1"

Ironwood/configs/collectives/all_gather_tpu7x_2x2x2.yaml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
benchmarks:
22
- benchmark_name: all_gather
33
benchmark_sweep_params:
4-
- {matrix_dim_range: {start: 8, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "4x2x2", ici_size_range: 16, sharding_strategy: "4x2x1", op_dimension: 1, num_runs: 5} # Parallel Replica
5-
- {matrix_dim_range: {start: 8, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "4x2x2", ici_size_range: 16, sharding_strategy: "4x2x2", op_dimension: 1, num_runs: 5} # Non-Parallel Replica
4+
- {matrix_dim_range: {start: 8, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "4x2x2", ici_size_range: 16, sharding_strategy: "4x2x1", op_dimension: 1, num_runs: 5}
65
trace_dir: "../microbenchmarks/all_gather_tpu7x_2x2x2"
76
csv_path: "../microbenchmarks/all_gather_tpu7x_2x2x2"
87
xlml_metrics_dir: "../microbenchmarks/all_gather_tpu7x_2x2x2"

Ironwood/configs/collectives/all_gather_tpu7x_2x2x4.yaml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
benchmarks:
22
- benchmark_name: all_gather
33
benchmark_sweep_params:
4-
- {matrix_dim_range: {start: 16, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "4x4x2", ici_size_range: 32, sharding_strategy: "4x4x1", op_dimension: 1, num_runs: 5} # Parallel Replica
5-
- {matrix_dim_range: {start: 16, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "4x4x2", ici_size_range: 32, sharding_strategy: "4x4x2", op_dimension: 1, num_runs: 5} # Non-Parallel Replica
4+
- {matrix_dim_range: {start: 16, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "4x4x2", ici_size_range: 32, sharding_strategy: "4x4x1", op_dimension: 1, num_runs: 5}
65
trace_dir: "../microbenchmarks/all_gather_tpu7x_2x2x4"
76
csv_path: "../microbenchmarks/all_gather_tpu7x_2x2x4"
87
xlml_metrics_dir: "../microbenchmarks/all_gather_tpu7x_2x2x4"

Ironwood/configs/collectives/all_gather_tpu7x_2x4x4.yaml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
benchmarks:
22
- benchmark_name: all_gather
33
benchmark_sweep_params:
4-
- {matrix_dim_range: {start: 32, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "8x4x2", ici_size_range: 64, sharding_strategy: "8x4x1", op_dimension: 1, num_runs: 5} # Parallel Replica
5-
- {matrix_dim_range: {start: 32, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "8x4x2", ici_size_range: 64, sharding_strategy: "8x4x2", op_dimension: 1, num_runs: 5} # Non-Parallel Replica
4+
- {matrix_dim_range: {start: 32, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "8x4x2", ici_size_range: 64, sharding_strategy: "8x4x1", op_dimension: 1, num_runs: 5}
65
trace_dir: "../microbenchmarks/all_gather_tpu7x_2x4x4"
76
csv_path: "../microbenchmarks/all_gather_tpu7x_2x4x4"
87
xlml_metrics_dir: "../microbenchmarks/all_gather_tpu7x_2x4x4"
Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
11
benchmarks:
22
- benchmark_name: all_gather
33
benchmark_sweep_params:
4-
- {matrix_dim_range: {start: 64, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "16x4x2", ici_size_range: 128, sharding_strategy: "16x4x1", op_dimension: 1, num_runs: 5} # Parallel Replica
5-
- {matrix_dim_range: {start: 64, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "16x4x2", ici_size_range: 128, sharding_strategy: "16x4x2", op_dimension: 1, num_runs: 5} # Non-Parallel Replica
4+
- {matrix_dim_range: {start: 64, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "16x4x2", ici_size_range: 128, sharding_strategy: "16x4x1", op_dimension: 1, num_runs: 10}
65
trace_dir: "../microbenchmarks/all_gather_tpu7x_4x4x4"
76
csv_path: "../microbenchmarks/all_gather_tpu7x_4x4x4"
87
xlml_metrics_dir: "../microbenchmarks/all_gather_tpu7x_4x4x4"
9-
xla_dump_dir: "../microbenchmarks/all_gather_tpu7x_4x4x4/hlo_graphs"
8+
xla_dump_dir: "../microbenchmarks/all_gather_tpu7x_4x4x4/hlo_graphs"

Ironwood/configs/collectives/all_gather_tpu7x_4x4x8.yaml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
benchmarks:
22
- benchmark_name: all_gather
33
benchmark_sweep_params:
4-
- {matrix_dim_range: {start: 128, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "32x4x2", ici_size_range: 256, sharding_strategy: "32x4x1", op_dimension: 1, num_runs: 5} # Parallel Replica
5-
- {matrix_dim_range: {start: 128, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "32x4x2", ici_size_range: 256, sharding_strategy: "32x4x2", op_dimension: 1, num_runs: 5} # Non-Parallel Replica
4+
- {matrix_dim_range: {start: 128, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "32x4x2", ici_size_range: 256, sharding_strategy: "32x4x1", op_dimension: 1, num_runs: 5}
65
trace_dir: "../microbenchmarks/all_gather_tpu7x_4x4x8"
76
csv_path: "../microbenchmarks/all_gather_tpu7x_4x4x8"
87
xlml_metrics_dir: "../microbenchmarks/all_gather_tpu7x_4x4x8"

0 commit comments

Comments
 (0)