diff --git a/Ironwood/configs/collectives/all_gather_tpu7x_2x2x1.yaml b/Ironwood/configs/collectives/all_gather_tpu7x_2x2x1.yaml index a4723209..9bc586a1 100644 --- a/Ironwood/configs/collectives/all_gather_tpu7x_2x2x1.yaml +++ b/Ironwood/configs/collectives/all_gather_tpu7x_2x2x1.yaml @@ -1,11 +1,9 @@ benchmarks: - benchmark_name: all_gather benchmark_sweep_params: - - {matrix_dim_range: {start: 64, end: 16384, multiplier: 4}, dtype: "float32", mesh_shape: "2x2x2", ici_size_range: 8, sharding_strategy: "2x2x1", op_dimension: 1, num_runs: 5} # Parallel Replica - - {matrix_dim_range: {start: 64, end: 16384, multiplier: 4}, dtype: "float32", mesh_shape: "1x8", ici_size_range: 8, sharding_strategy: "1x8", op_dimension: 1, num_runs: 5} # Non-Parallel Replica - - warmup_tries: 10 - trace_dir: "../microbenchmarks/all_gather" - csv_path: "../microbenchmarks/all_gather" - xlml_metrics_dir: "../microbenchmarks/all_gather" - xla_dump_dir: "../microbenchmarks/all_gather/hlo_graphs" \ No newline at end of file + - {matrix_dim_range: {start: 8, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "2x2x2", ici_size_range: 8, sharding_strategy: "2x2x1", op_dimension: 1, num_runs: 5} # Parallel Replica + - {matrix_dim_range: {start: 8, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "2x2x2", ici_size_range: 8, sharding_strategy: "2x2x2", op_dimension: 1, num_runs: 5} # Non-Parallel Replica + trace_dir: "../microbenchmarks/all_gather_tpu7x_2x2x1" + csv_path: "../microbenchmarks/all_gather_tpu7x_2x2x1" + xlml_metrics_dir: "../microbenchmarks/all_gather_tpu7x_2x2x1" + xla_dump_dir: "../microbenchmarks/all_gather_tpu7x_2x2x1/hlo_graphs" \ No newline at end of file diff --git a/Ironwood/configs/collectives/all_gather_tpu7x_2x2x2.yaml b/Ironwood/configs/collectives/all_gather_tpu7x_2x2x2.yaml index 97d74508..b5be0c8d 100644 --- a/Ironwood/configs/collectives/all_gather_tpu7x_2x2x2.yaml +++ b/Ironwood/configs/collectives/all_gather_tpu7x_2x2x2.yaml @@ -1,11 +1,9 @@ benchmarks: - benchmark_name: all_gather benchmark_sweep_params: - - {matrix_dim_range: {start: 64, end: 16384, multiplier: 4}, dtype: "float32", mesh_shape: "2x4x2", ici_size_range: 16, sharding_strategy: "1x4x1", op_dimension: 1, num_runs: 5} # Parallel Replica - - {matrix_dim_range: {start: 64, end: 16384, multiplier: 4}, dtype: "float32", mesh_shape: "2x2x4", ici_size_range: 16, sharding_strategy: "1x2x4", op_dimension: 1, num_runs: 5} # Non-Parallel Replica - - warmup_tries: 10 - trace_dir: "../microbenchmarks/all_gather" - csv_path: "../microbenchmarks/all_gather" - xlml_metrics_dir: "../microbenchmarks/all_gather" - xla_dump_dir: "../microbenchmarks/all_gather/hlo_graphs" \ No newline at end of file + - {matrix_dim_range: {start: 8, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "4x2x2", ici_size_range: 16, sharding_strategy: "4x2x1", op_dimension: 1, num_runs: 5} # Parallel Replica + - {matrix_dim_range: {start: 8, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "4x2x2", ici_size_range: 16, sharding_strategy: "4x2x2", op_dimension: 1, num_runs: 5} # Non-Parallel Replica + trace_dir: "../microbenchmarks/all_gather_tpu7x_2x2x2" + csv_path: "../microbenchmarks/all_gather_tpu7x_2x2x2" + xlml_metrics_dir: "../microbenchmarks/all_gather_tpu7x_2x2x2" + xla_dump_dir: "../microbenchmarks/all_gather_tpu7x_2x2x2/hlo_graphs" \ No newline at end of file diff --git a/Ironwood/configs/collectives/all_gather_tpu7x_2x2x4.yaml b/Ironwood/configs/collectives/all_gather_tpu7x_2x2x4.yaml new file mode 100644 index 00000000..09b02979 --- /dev/null +++ b/Ironwood/configs/collectives/all_gather_tpu7x_2x2x4.yaml @@ -0,0 +1,9 @@ +benchmarks: +- benchmark_name: all_gather + benchmark_sweep_params: + - {matrix_dim_range: {start: 16, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "4x4x2", ici_size_range: 32, sharding_strategy: "4x4x1", op_dimension: 1, num_runs: 5} # Parallel Replica + - {matrix_dim_range: {start: 16, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "4x4x2", ici_size_range: 32, sharding_strategy: "4x4x2", op_dimension: 1, num_runs: 5} # Non-Parallel Replica + trace_dir: "../microbenchmarks/all_gather_tpu7x_2x2x4" + csv_path: "../microbenchmarks/all_gather_tpu7x_2x2x4" + xlml_metrics_dir: "../microbenchmarks/all_gather_tpu7x_2x2x4" + xla_dump_dir: "../microbenchmarks/all_gather_tpu7x_2x2x4/hlo_graphs" \ No newline at end of file diff --git a/Ironwood/configs/collectives/all_gather_tpu7x_2x4x4.yaml b/Ironwood/configs/collectives/all_gather_tpu7x_2x4x4.yaml new file mode 100644 index 00000000..4f6cf11a --- /dev/null +++ b/Ironwood/configs/collectives/all_gather_tpu7x_2x4x4.yaml @@ -0,0 +1,9 @@ +benchmarks: +- benchmark_name: all_gather + benchmark_sweep_params: + - {matrix_dim_range: {start: 32, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "8x4x2", ici_size_range: 64, sharding_strategy: "8x4x1", op_dimension: 1, num_runs: 5} # Parallel Replica + - {matrix_dim_range: {start: 32, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "8x4x2", ici_size_range: 64, sharding_strategy: "8x4x2", op_dimension: 1, num_runs: 5} # Non-Parallel Replica + trace_dir: "../microbenchmarks/all_gather_tpu7x_2x4x4" + csv_path: "../microbenchmarks/all_gather_tpu7x_2x4x4" + xlml_metrics_dir: "../microbenchmarks/all_gather_tpu7x_2x4x4" + xla_dump_dir: "../microbenchmarks/all_gather_tpu7x_2x4x4/hlo_graphs" \ No newline at end of file diff --git a/Ironwood/configs/collectives/all_gather_tpu7x_4x4x4.yaml b/Ironwood/configs/collectives/all_gather_tpu7x_4x4x4.yaml new file mode 100644 index 00000000..77f3ed13 --- /dev/null +++ b/Ironwood/configs/collectives/all_gather_tpu7x_4x4x4.yaml @@ -0,0 +1,9 @@ +benchmarks: +- benchmark_name: all_gather + benchmark_sweep_params: + - {matrix_dim_range: {start: 64, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "16x4x2", ici_size_range: 128, sharding_strategy: "16x4x1", op_dimension: 1, num_runs: 5} # Parallel Replica + - {matrix_dim_range: {start: 64, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "16x4x2", ici_size_range: 128, sharding_strategy: "16x4x2", op_dimension: 1, num_runs: 5} # Non-Parallel Replica + trace_dir: "../microbenchmarks/all_gather_tpu7x_4x4x4" + csv_path: "../microbenchmarks/all_gather_tpu7x_4x4x4" + xlml_metrics_dir: "../microbenchmarks/all_gather_tpu7x_4x4x4" + xla_dump_dir: "../microbenchmarks/all_gather_tpu7x_4x4x4/hlo_graphs" \ No newline at end of file diff --git a/Ironwood/configs/collectives/all_gather_tpu7x_4x4x8.yaml b/Ironwood/configs/collectives/all_gather_tpu7x_4x4x8.yaml new file mode 100644 index 00000000..12743d61 --- /dev/null +++ b/Ironwood/configs/collectives/all_gather_tpu7x_4x4x8.yaml @@ -0,0 +1,9 @@ +benchmarks: +- benchmark_name: all_gather + benchmark_sweep_params: + - {matrix_dim_range: {start: 128, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "32x4x2", ici_size_range: 256, sharding_strategy: "32x4x1", op_dimension: 1, num_runs: 5} # Parallel Replica + - {matrix_dim_range: {start: 128, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "32x4x2", ici_size_range: 256, sharding_strategy: "32x4x2", op_dimension: 1, num_runs: 5} # Non-Parallel Replica + trace_dir: "../microbenchmarks/all_gather_tpu7x_4x4x8" + csv_path: "../microbenchmarks/all_gather_tpu7x_4x4x8" + xlml_metrics_dir: "../microbenchmarks/all_gather_tpu7x_4x4x8" + xla_dump_dir: "../microbenchmarks/all_gather_tpu7x_4x4x8/hlo_graphs" \ No newline at end of file diff --git a/Ironwood/configs/collectives/all_reduce_tpu7x_2x2x1.yaml b/Ironwood/configs/collectives/all_reduce_tpu7x_2x2x1.yaml new file mode 100644 index 00000000..f7389925 --- /dev/null +++ b/Ironwood/configs/collectives/all_reduce_tpu7x_2x2x1.yaml @@ -0,0 +1,9 @@ +benchmarks: +- benchmark_name: psum + benchmark_sweep_params: + - {matrix_dim_range: {start: 8, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "2x2x2", ici_size_range: 8, sharding_strategy: "2x2x1", op_dimension: 1, num_runs: 5} # Parallel Replica + - {matrix_dim_range: {start: 8, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "2x2x2", ici_size_range: 8, sharding_strategy: "2x2x2", op_dimension: 1, num_runs: 5} # Non-Parallel Replica + trace_dir: "../microbenchmarks/psum_tpu7x_2x2x1" + csv_path: "../microbenchmarks/psum_tpu7x_2x2x1" + xlml_metrics_dir: "../microbenchmarks/psum_tpu7x_2x2x1" + xla_dump_dir: "../microbenchmarks/psum_tpu7x_2x2x1/hlo_graphs" \ No newline at end of file diff --git a/Ironwood/configs/collectives/all_reduce_tpu7x_2x2x2.yaml b/Ironwood/configs/collectives/all_reduce_tpu7x_2x2x2.yaml new file mode 100644 index 00000000..b2cb202c --- /dev/null +++ b/Ironwood/configs/collectives/all_reduce_tpu7x_2x2x2.yaml @@ -0,0 +1,9 @@ +benchmarks: +- benchmark_name: psum + benchmark_sweep_params: + - {matrix_dim_range: {start: 8, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "4x2x2", ici_size_range: 16, sharding_strategy: "4x2x1", op_dimension: 1, num_runs: 5} # Parallel Replica + - {matrix_dim_range: {start: 8, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "4x2x2", ici_size_range: 16, sharding_strategy: "4x2x2", op_dimension: 1, num_runs: 5} # Non-Parallel Replica + trace_dir: "../microbenchmarks/psum_tpu7x_2x2x2" + csv_path: "../microbenchmarks/psum_tpu7x_2x2x2" + xlml_metrics_dir: "../microbenchmarks/psum_tpu7x_2x2x2" + xla_dump_dir: "../microbenchmarks/psum_tpu7x_2x2x2/hlo_graphs" \ No newline at end of file diff --git a/Ironwood/configs/collectives/all_reduce_tpu7x_2x2x4.yaml b/Ironwood/configs/collectives/all_reduce_tpu7x_2x2x4.yaml new file mode 100644 index 00000000..946fd5ed --- /dev/null +++ b/Ironwood/configs/collectives/all_reduce_tpu7x_2x2x4.yaml @@ -0,0 +1,9 @@ +benchmarks: +- benchmark_name: psum + benchmark_sweep_params: + - {matrix_dim_range: {start: 16, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "4x4x2", ici_size_range: 32, sharding_strategy: "4x4x1", op_dimension: 1, num_runs: 5} # Parallel Replica + - {matrix_dim_range: {start: 16, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "4x4x2", ici_size_range: 32, sharding_strategy: "4x4x2", op_dimension: 1, num_runs: 5} # Non-Parallel Replica + trace_dir: "../microbenchmarks/psum_tpu7x_2x2x4" + csv_path: "../microbenchmarks/psum_tpu7x_2x2x4" + xlml_metrics_dir: "../microbenchmarks/psum_tpu7x_2x2x4" + xla_dump_dir: "../microbenchmarks/psum_tpu7x_2x2x4/hlo_graphs" \ No newline at end of file diff --git a/Ironwood/configs/collectives/all_reduce_tpu7x_2x4x4.yaml b/Ironwood/configs/collectives/all_reduce_tpu7x_2x4x4.yaml new file mode 100644 index 00000000..613717cf --- /dev/null +++ b/Ironwood/configs/collectives/all_reduce_tpu7x_2x4x4.yaml @@ -0,0 +1,9 @@ +benchmarks: +- benchmark_name: psum + benchmark_sweep_params: + - {matrix_dim_range: {start: 32, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "8x4x2", ici_size_range: 64, sharding_strategy: "8x4x1", op_dimension: 1, num_runs: 5} # Parallel Replica + - {matrix_dim_range: {start: 32, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "8x4x2", ici_size_range: 64, sharding_strategy: "8x4x2", op_dimension: 1, num_runs: 5} # Non-Parallel Replica + trace_dir: "../microbenchmarks/psum_tpu7x_2x4x4" + csv_path: "../microbenchmarks/psum_tpu7x_2x4x4" + xlml_metrics_dir: "../microbenchmarks/psum_tpu7x_2x4x4" + xla_dump_dir: "../microbenchmarks/psum_tpu7x_2x4x4/hlo_graphs" \ No newline at end of file diff --git a/Ironwood/configs/collectives/all_reduce_tpu7x_4x4x4.yaml b/Ironwood/configs/collectives/all_reduce_tpu7x_4x4x4.yaml new file mode 100644 index 00000000..3f4822c0 --- /dev/null +++ b/Ironwood/configs/collectives/all_reduce_tpu7x_4x4x4.yaml @@ -0,0 +1,9 @@ +benchmarks: +- benchmark_name: psum + benchmark_sweep_params: + - {matrix_dim_range: {start: 64, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "16x4x2", ici_size_range: 128, sharding_strategy: "16x4x1", op_dimension: 1, num_runs: 5} # Parallel Replica + - {matrix_dim_range: {start: 64, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "16x4x2", ici_size_range: 128, sharding_strategy: "16x4x2", op_dimension: 1, num_runs: 5} # Non-Parallel Replica + trace_dir: "../microbenchmarks/psum_tpu7x_4x4x4" + csv_path: "../microbenchmarks/psum_tpu7x_4x4x4" + xlml_metrics_dir: "../microbenchmarks/psum_tpu7x_4x4x4" + xla_dump_dir: "../microbenchmarks/psum_tpu7x_4x4x4/hlo_graphs" \ No newline at end of file diff --git a/Ironwood/configs/collectives/all_reduce_tpu7x_4x4x8.yaml b/Ironwood/configs/collectives/all_reduce_tpu7x_4x4x8.yaml new file mode 100644 index 00000000..a14bbfe8 --- /dev/null +++ b/Ironwood/configs/collectives/all_reduce_tpu7x_4x4x8.yaml @@ -0,0 +1,9 @@ +benchmarks: +- benchmark_name: psum + benchmark_sweep_params: + - {matrix_dim_range: {start: 128, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "32x4x2", ici_size_range: 256, sharding_strategy: "32x4x1", op_dimension: 1, num_runs: 5} # Parallel Replica + - {matrix_dim_range: {start: 128, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "32x4x2", ici_size_range: 256, sharding_strategy: "32x4x2", op_dimension: 1, num_runs: 5} # Non-Parallel Replica + trace_dir: "../microbenchmarks/psum_tpu7x_4x4x8" + csv_path: "../microbenchmarks/psum_tpu7x_4x4x8" + xlml_metrics_dir: "../microbenchmarks/psum_tpu7x_4x4x8" + xla_dump_dir: "../microbenchmarks/psum_tpu7x_4x4x8/hlo_graphs" \ No newline at end of file diff --git a/Ironwood/configs/collectives/all_to_all_tpu7x_2x2x1.yaml b/Ironwood/configs/collectives/all_to_all_tpu7x_2x2x1.yaml new file mode 100644 index 00000000..96da2c38 --- /dev/null +++ b/Ironwood/configs/collectives/all_to_all_tpu7x_2x2x1.yaml @@ -0,0 +1,9 @@ +benchmarks: +- benchmark_name: all_to_all + benchmark_sweep_params: + - {matrix_dim_range: {start: 8, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "2x2x2", ici_size_range: 8, sharding_strategy: "2x2x1", op_dimension: 1, num_runs: 5} # Parallel Replica + - {matrix_dim_range: {start: 8, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "2x2x2", ici_size_range: 8, sharding_strategy: "2x2x2", op_dimension: 1, num_runs: 5} # Non-Parallel Replica + trace_dir: "../microbenchmarks/all_to_all_tpu7x_2x2x1" + csv_path: "../microbenchmarks/all_to_all_tpu7x_2x2x1" + xlml_metrics_dir: "../microbenchmarks/all_to_all_tpu7x_2x2x1" + xla_dump_dir: "../microbenchmarks/all_to_all_tpu7x_2x2x1/hlo_graphs" \ No newline at end of file diff --git a/Ironwood/configs/collectives/all_to_all_tpu7x_2x2x2.yaml b/Ironwood/configs/collectives/all_to_all_tpu7x_2x2x2.yaml new file mode 100644 index 00000000..388a4468 --- /dev/null +++ b/Ironwood/configs/collectives/all_to_all_tpu7x_2x2x2.yaml @@ -0,0 +1,9 @@ +benchmarks: +- benchmark_name: all_to_all + benchmark_sweep_params: + - {matrix_dim_range: {start: 8, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "4x2x2", ici_size_range: 16, sharding_strategy: "4x2x1", op_dimension: 1, num_runs: 5} # Parallel Replica + - {matrix_dim_range: {start: 8, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "4x2x2", ici_size_range: 16, sharding_strategy: "4x2x2", op_dimension: 1, num_runs: 5} # Non-Parallel Replica + trace_dir: "../microbenchmarks/all_to_all_tpu7x_2x2x2" + csv_path: "../microbenchmarks/all_to_all_tpu7x_2x2x2" + xlml_metrics_dir: "../microbenchmarks/all_to_all_tpu7x_2x2x2" + xla_dump_dir: "../microbenchmarks/all_to_all_tpu7x_2x2x2/hlo_graphs" \ No newline at end of file diff --git a/Ironwood/configs/collectives/all_to_all_tpu7x_2x2x4.yaml b/Ironwood/configs/collectives/all_to_all_tpu7x_2x2x4.yaml new file mode 100644 index 00000000..e0cc48c9 --- /dev/null +++ b/Ironwood/configs/collectives/all_to_all_tpu7x_2x2x4.yaml @@ -0,0 +1,9 @@ +benchmarks: +- benchmark_name: all_to_all + benchmark_sweep_params: + - {matrix_dim_range: {start: 16, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "4x4x2", ici_size_range: 32, sharding_strategy: "4x4x1", op_dimension: 1, num_runs: 5} # Parallel Replica + - {matrix_dim_range: {start: 16, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "4x4x2", ici_size_range: 32, sharding_strategy: "4x4x2", op_dimension: 1, num_runs: 5} # Non-Parallel Replica + trace_dir: "../microbenchmarks/all_to_all_tpu7x_2x2x4" + csv_path: "../microbenchmarks/all_to_all_tpu7x_2x2x4" + xlml_metrics_dir: "../microbenchmarks/all_to_all_tpu7x_2x2x4" + xla_dump_dir: "../microbenchmarks/all_to_all_tpu7x_2x2x4/hlo_graphs" \ No newline at end of file diff --git a/Ironwood/configs/collectives/all_to_all_tpu7x_2x4x4.yaml b/Ironwood/configs/collectives/all_to_all_tpu7x_2x4x4.yaml new file mode 100644 index 00000000..5ae19b6e --- /dev/null +++ b/Ironwood/configs/collectives/all_to_all_tpu7x_2x4x4.yaml @@ -0,0 +1,9 @@ +benchmarks: +- benchmark_name: all_to_all + benchmark_sweep_params: + - {matrix_dim_range: {start: 32, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "8x4x2", ici_size_range: 64, sharding_strategy: "8x4x1", op_dimension: 1, num_runs: 5} # Parallel Replica + - {matrix_dim_range: {start: 32, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "8x4x2", ici_size_range: 64, sharding_strategy: "8x4x2", op_dimension: 1, num_runs: 5} # Non-Parallel Replica + trace_dir: "../microbenchmarks/all_to_all_tpu7x_2x4x4" + csv_path: "../microbenchmarks/all_to_all_tpu7x_2x4x4" + xlml_metrics_dir: "../microbenchmarks/all_to_all_tpu7x_2x4x4" + xla_dump_dir: "../microbenchmarks/all_to_all_tpu7x_2x4x4/hlo_graphs" \ No newline at end of file diff --git a/Ironwood/configs/collectives/all_to_all_tpu7x_4x4x4.yaml b/Ironwood/configs/collectives/all_to_all_tpu7x_4x4x4.yaml new file mode 100644 index 00000000..4cc8f6bb --- /dev/null +++ b/Ironwood/configs/collectives/all_to_all_tpu7x_4x4x4.yaml @@ -0,0 +1,9 @@ +benchmarks: +- benchmark_name: all_to_all + benchmark_sweep_params: + - {matrix_dim_range: {start: 64, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "16x4x2", ici_size_range: 128, sharding_strategy: "16x4x1", op_dimension: 1, num_runs: 5} # Parallel Replica + - {matrix_dim_range: {start: 64, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "16x4x2", ici_size_range: 128, sharding_strategy: "16x4x2", op_dimension: 1, num_runs: 5} # Non-Parallel Replica + trace_dir: "../microbenchmarks/all_to_all_tpu7x_4x4x4" + csv_path: "../microbenchmarks/all_to_all_tpu7x_4x4x4" + xlml_metrics_dir: "../microbenchmarks/all_to_all_tpu7x_4x4x4" + xla_dump_dir: "../microbenchmarks/all_to_all_tpu7x_4x4x4/hlo_graphs" \ No newline at end of file diff --git a/Ironwood/configs/collectives/all_to_all_tpu7x_4x4x8.yaml b/Ironwood/configs/collectives/all_to_all_tpu7x_4x4x8.yaml new file mode 100644 index 00000000..212cd92d --- /dev/null +++ b/Ironwood/configs/collectives/all_to_all_tpu7x_4x4x8.yaml @@ -0,0 +1,9 @@ +benchmarks: +- benchmark_name: all_to_all + benchmark_sweep_params: + - {matrix_dim_range: {start: 128, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "32x4x2", ici_size_range: 256, sharding_strategy: "32x4x1", op_dimension: 1, num_runs: 5} # Parallel Replica + - {matrix_dim_range: {start: 128, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "32x4x2", ici_size_range: 256, sharding_strategy: "32x4x2", op_dimension: 1, num_runs: 5} # Non-Parallel Replica + trace_dir: "../microbenchmarks/all_to_all_tpu7x_4x4x8" + csv_path: "../microbenchmarks/all_to_all_tpu7x_4x4x8" + xlml_metrics_dir: "../microbenchmarks/all_to_all_tpu7x_4x4x8" + xla_dump_dir: "../microbenchmarks/all_to_all_tpu7x_4x4x8/hlo_graphs" \ No newline at end of file