Skip to content

Commit 1d4baa3

Browse files
committed
update sharding dump
1 parent bca93db commit 1d4baa3

6 files changed

Lines changed: 30 additions & 72 deletions

File tree

tests/utils/sharding_info/deepseek2-16b/tpu7x-16/slice_1/rule_default/input_shardings.json

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -96,18 +96,6 @@
9696
"PartitionSpec": "P('fsdp', None, None, None)"
9797
}
9898
},
99-
{
100-
"attention_op/arr: int8[1,4,4]": {
101-
"logic_axes": "Unknown",
102-
"PartitionSpec": "P(None, None)"
103-
}
104-
},
105-
{
106-
"attention_op/arr: int32[2048]": {
107-
"logic_axes": "Unknown",
108-
"PartitionSpec": "P(None,)"
109-
}
110-
},
11199
{
112100
"attention_mla/out: bfloat16[192,2048,16,128]": {
113101
"logic_axes": "('activation_batch', 'activation_length', 'activation_heads', 'activation_kv')",

tests/utils/sharding_info/deepseek2-16b/tpu7x-16/slice_1/rule_pure-fsdp/input_shardings.json

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -96,18 +96,6 @@
9696
"PartitionSpec": "P('fsdp', None, None, None)"
9797
}
9898
},
99-
{
100-
"attention_op/arr: int8[1,4,4]": {
101-
"logic_axes": "Unknown",
102-
"PartitionSpec": "P(None, None)"
103-
}
104-
},
105-
{
106-
"attention_op/arr: int32[2048]": {
107-
"logic_axes": "Unknown",
108-
"PartitionSpec": "P(None,)"
109-
}
110-
},
11199
{
112100
"attention_mla/out: bfloat16[192,2048,16,128]": {
113101
"logic_axes": "('activation_batch', 'activation_length', 'activation_heads', 'activation_kv')",

tests/utils/sharding_info/deepseek2-16b/tpu7x-8/slice_1/rule_ep-as-cp_ici_fsdp_parallelism=-1_ici_expert_parallelism=2/input_shardings.json

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,18 @@
6666
"PartitionSpec": "P('fsdp', 'expert', None, None)"
6767
}
6868
},
69+
{
70+
"attention_op/arr: int8[1,4,4]": {
71+
"logic_axes": "Unknown",
72+
"PartitionSpec": "P(None, 'expert')"
73+
}
74+
},
75+
{
76+
"attention_op/arr: int32[2048]": {
77+
"logic_axes": "Unknown",
78+
"PartitionSpec": "P('expert',)"
79+
}
80+
},
6981
{
7082
"attention_op/query: bfloat16[96,16,2048,192]": {
7183
"logic_axes": "Unknown",
@@ -132,6 +144,24 @@
132144
"PartitionSpec": "P('fsdp', 'expert', None)"
133145
}
134146
},
147+
{
148+
"moe/w0_kernel: bfloat16[64,2048,1408]": {
149+
"logic_axes": "Unknown",
150+
"PartitionSpec": "P('expert', None, None)"
151+
}
152+
},
153+
{
154+
"moe/w1_kernel: bfloat16[64,2048,1408]": {
155+
"logic_axes": "Unknown",
156+
"PartitionSpec": "P('expert', None, None)"
157+
}
158+
},
159+
{
160+
"moe/wo_kernel: bfloat16[64,1408,2048]": {
161+
"logic_axes": "Unknown",
162+
"PartitionSpec": "P('expert', None, None)"
163+
}
164+
},
135165
{
136166
"linears/x: bfloat16[96,2048,2816]": {
137167
"logic_axes": "('activation_batch', 'activation_length', 'activation_mlp')",

tests/utils/sharding_info/gpt-oss-20b/tpu7x-16/slice_1/rule_default/input_shardings.json

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -66,24 +66,6 @@
6666
"PartitionSpec": "P('fsdp', None, None, None)"
6767
}
6868
},
69-
{
70-
"attention_op/arr: int8[1,4,2]": {
71-
"logic_axes": "Unknown",
72-
"PartitionSpec": "P(None, None)"
73-
}
74-
},
75-
{
76-
"attention_op/arr: bool[2,512,512]": {
77-
"logic_axes": "Unknown",
78-
"PartitionSpec": "P()"
79-
}
80-
},
81-
{
82-
"attention_op/arr: int8[1,2,4]": {
83-
"logic_axes": "Unknown",
84-
"PartitionSpec": "P(None, None)"
85-
}
86-
},
8769
{
8870
"attentions/out: bfloat16[192,2048,64,64]": {
8971
"logic_axes": "('activation_batch', 'activation_attn_length', 'activation_heads', 'activation_kv')",

tests/utils/sharding_info/gpt-oss-20b/tpu7x-16/slice_1/rule_default_ici_fsdp_parallelism=-1_ici_expert_parallelism=2/input_shardings.json

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -66,24 +66,6 @@
6666
"PartitionSpec": "P(('fsdp', 'expert'), None, None, None)"
6767
}
6868
},
69-
{
70-
"attention_op/arr: int8[1,4,2]": {
71-
"logic_axes": "Unknown",
72-
"PartitionSpec": "P(None, None)"
73-
}
74-
},
75-
{
76-
"attention_op/arr: bool[2,512,512]": {
77-
"logic_axes": "Unknown",
78-
"PartitionSpec": "P()"
79-
}
80-
},
81-
{
82-
"attention_op/arr: int8[1,2,4]": {
83-
"logic_axes": "Unknown",
84-
"PartitionSpec": "P(None, None)"
85-
}
86-
},
8769
{
8870
"attentions/out: bfloat16[192,2048,64,64]": {
8971
"logic_axes": "('activation_batch', 'activation_attn_length', 'activation_heads', 'activation_kv')",

tests/utils/sharding_info/qwen3-0.6b/tpu7x-16/slice_1/rule_default/input_shardings.json

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -60,18 +60,6 @@
6060
"PartitionSpec": "P('fsdp', None, None, None)"
6161
}
6262
},
63-
{
64-
"attention_op/arr: int8[1,4,4]": {
65-
"logic_axes": "Unknown",
66-
"PartitionSpec": "P(None, None)"
67-
}
68-
},
69-
{
70-
"attention_op/arr: int32[2048]": {
71-
"logic_axes": "Unknown",
72-
"PartitionSpec": "P(None,)"
73-
}
74-
},
7563
{
7664
"attentions/out: bfloat16[192,2048,16,128]": {
7765
"logic_axes": "('activation_batch', 'activation_attn_length', 'activation_heads', 'activation_kv')",

0 commit comments

Comments
 (0)