|
1 | 1 | benchmarks: |
2 | 2 | - benchmark_name: all_gather |
3 | 3 | benchmark_sweep_params: |
4 | | - - {matrix_dim_range: {start: 2, end: 512, multiplier: 2}, dtype: "float32", mesh_shape: "16x4x2", ici_size_range: 128, sharding_strategy: "1x4x1", op_dimension: 1} # Parallel Replica |
5 | | - - {matrix_dim_range: {start: 2, end: 512, multiplier: 2}, dtype: "float32", mesh_shape: "4x4x8", ici_size_range: 128, sharding_strategy: "1x1x4", op_dimension: 1} # Non-Parallel Replica |
6 | | - - {matrix_dim_range: {start: 2, end: 512, multiplier: 2}, dtype: "float32", mesh_shape: "4x32", ici_size_range: 128, sharding_strategy: "1x32", op_dimension: 2} # Non Parallel Replica Groups |
7 | | - - {matrix_dim_range: {start: 2, end: 512, multiplier: 2}, dtype: "float32", mesh_shape: "4x16x2", ici_size_range: 128, sharding_strategy: "1x16x1", op_dimension: 2} # Parallel Replica Groups |
8 | | - - {matrix_dim_range: {start: 2, end: 512, multiplier: 2}, dtype: "float32", mesh_shape: "64x2", ici_size_range: 128, sharding_strategy: "64x1", op_dimension: 3} # Parallel Replica Groups |
9 | | - - {matrix_dim_range: {start: 2, end: 512, multiplier: 2}, dtype: "float32", mesh_shape: "4x4x8", ici_size_range: 128, sharding_strategy: "4x4x8", op_dimension: 3} # Non Parallel Replica Groups |
10 | | - |
| 4 | + - {matrix_dim_range: {start: 2, end: 512, multiplier: 2}, dtype: "float32", mesh_shape: "16x4x2", ici_size_range: 128, sharding_strategy: "1x4x1", op_dimension: 1} |
| 5 | + - {matrix_dim_range: {start: 2, end: 512, multiplier: 2}, dtype: "float32", mesh_shape: "4x16x2", ici_size_range: 128, sharding_strategy: "1x16x1", op_dimension: 2} |
| 6 | + - {matrix_dim_range: {start: 2, end: 512, multiplier: 2}, dtype: "float32", mesh_shape: "64x2", ici_size_range: 128, sharding_strategy: "64x1", op_dimension: 3} |
11 | 7 |
|
12 | 8 | warmup_tries: 10 |
13 | 9 | trace_dir: "../microbenchmarks/all_gather_demo" |
|
0 commit comments