Skip to content

Commit f5ee4c3

Browse files
authored
tune triton-fp8w8a8g64 and triton-fp8w8a8g128 in 4090D (#1216)
1 parent a7d5e95 commit f5ee4c3

10 files changed

+740
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
{
2+
"1": {
3+
"BLOCK_K": 128,
4+
"BLOCK_M": 8,
5+
"BLOCK_N": 64,
6+
"GROUP_M": 8,
7+
"num_stages": 6,
8+
"num_warps": 4
9+
},
10+
"100": {
11+
"BLOCK_K": 128,
12+
"BLOCK_M": 64,
13+
"BLOCK_N": 128,
14+
"GROUP_M": 8,
15+
"num_stages": 4,
16+
"num_warps": 8
17+
},
18+
"1024": {
19+
"BLOCK_K": 128,
20+
"BLOCK_M": 64,
21+
"BLOCK_N": 64,
22+
"GROUP_M": 8,
23+
"num_stages": 3,
24+
"num_warps": 4
25+
},
26+
"128": {
27+
"BLOCK_K": 128,
28+
"BLOCK_M": 64,
29+
"BLOCK_N": 64,
30+
"GROUP_M": 8,
31+
"num_stages": 4,
32+
"num_warps": 4
33+
},
34+
"16": {
35+
"BLOCK_K": 128,
36+
"BLOCK_M": 16,
37+
"BLOCK_N": 64,
38+
"GROUP_M": 8,
39+
"num_stages": 4,
40+
"num_warps": 4
41+
},
42+
"256": {
43+
"BLOCK_K": 128,
44+
"BLOCK_M": 64,
45+
"BLOCK_N": 64,
46+
"GROUP_M": 8,
47+
"num_stages": 3,
48+
"num_warps": 4
49+
},
50+
"32": {
51+
"BLOCK_K": 128,
52+
"BLOCK_M": 32,
53+
"BLOCK_N": 64,
54+
"GROUP_M": 8,
55+
"num_stages": 4,
56+
"num_warps": 8
57+
},
58+
"64": {
59+
"BLOCK_K": 128,
60+
"BLOCK_M": 64,
61+
"BLOCK_N": 64,
62+
"GROUP_M": 8,
63+
"num_stages": 3,
64+
"num_warps": 4
65+
},
66+
"8": {
67+
"BLOCK_K": 128,
68+
"BLOCK_M": 8,
69+
"BLOCK_N": 64,
70+
"GROUP_M": 8,
71+
"num_stages": 5,
72+
"num_warps": 4
73+
}
74+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
{
2+
"1": {
3+
"BLOCK_K": 128,
4+
"BLOCK_M": 8,
5+
"BLOCK_N": 64,
6+
"GROUP_M": 8,
7+
"num_stages": 4,
8+
"num_warps": 4
9+
},
10+
"100": {
11+
"BLOCK_K": 128,
12+
"BLOCK_M": 64,
13+
"BLOCK_N": 128,
14+
"GROUP_M": 8,
15+
"num_stages": 4,
16+
"num_warps": 8
17+
},
18+
"1024": {
19+
"BLOCK_K": 128,
20+
"BLOCK_M": 64,
21+
"BLOCK_N": 64,
22+
"GROUP_M": 8,
23+
"num_stages": 3,
24+
"num_warps": 4
25+
},
26+
"128": {
27+
"BLOCK_K": 128,
28+
"BLOCK_M": 64,
29+
"BLOCK_N": 64,
30+
"GROUP_M": 8,
31+
"num_stages": 3,
32+
"num_warps": 4
33+
},
34+
"16": {
35+
"BLOCK_K": 128,
36+
"BLOCK_M": 16,
37+
"BLOCK_N": 64,
38+
"GROUP_M": 8,
39+
"num_stages": 4,
40+
"num_warps": 4
41+
},
42+
"256": {
43+
"BLOCK_K": 128,
44+
"BLOCK_M": 64,
45+
"BLOCK_N": 64,
46+
"GROUP_M": 8,
47+
"num_stages": 3,
48+
"num_warps": 4
49+
},
50+
"32": {
51+
"BLOCK_K": 128,
52+
"BLOCK_M": 32,
53+
"BLOCK_N": 64,
54+
"GROUP_M": 8,
55+
"num_stages": 4,
56+
"num_warps": 8
57+
},
58+
"64": {
59+
"BLOCK_K": 128,
60+
"BLOCK_M": 32,
61+
"BLOCK_N": 64,
62+
"GROUP_M": 8,
63+
"num_stages": 3,
64+
"num_warps": 2
65+
},
66+
"8": {
67+
"BLOCK_K": 128,
68+
"BLOCK_M": 8,
69+
"BLOCK_N": 64,
70+
"GROUP_M": 8,
71+
"num_stages": 5,
72+
"num_warps": 4
73+
}
74+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
{
2+
"1": {
3+
"BLOCK_K": 128,
4+
"BLOCK_M": 8,
5+
"BLOCK_N": 64,
6+
"GROUP_M": 8,
7+
"num_stages": 4,
8+
"num_warps": 4
9+
},
10+
"100": {
11+
"BLOCK_K": 128,
12+
"BLOCK_M": 64,
13+
"BLOCK_N": 128,
14+
"GROUP_M": 8,
15+
"num_stages": 3,
16+
"num_warps": 8
17+
},
18+
"1024": {
19+
"BLOCK_K": 128,
20+
"BLOCK_M": 64,
21+
"BLOCK_N": 64,
22+
"GROUP_M": 8,
23+
"num_stages": 3,
24+
"num_warps": 4
25+
},
26+
"128": {
27+
"BLOCK_K": 128,
28+
"BLOCK_M": 32,
29+
"BLOCK_N": 64,
30+
"GROUP_M": 8,
31+
"num_stages": 3,
32+
"num_warps": 8
33+
},
34+
"16": {
35+
"BLOCK_K": 128,
36+
"BLOCK_M": 16,
37+
"BLOCK_N": 64,
38+
"GROUP_M": 8,
39+
"num_stages": 4,
40+
"num_warps": 4
41+
},
42+
"256": {
43+
"BLOCK_K": 128,
44+
"BLOCK_M": 64,
45+
"BLOCK_N": 64,
46+
"GROUP_M": 8,
47+
"num_stages": 3,
48+
"num_warps": 4
49+
},
50+
"32": {
51+
"BLOCK_K": 128,
52+
"BLOCK_M": 32,
53+
"BLOCK_N": 64,
54+
"GROUP_M": 8,
55+
"num_stages": 3,
56+
"num_warps": 8
57+
},
58+
"64": {
59+
"BLOCK_K": 128,
60+
"BLOCK_M": 64,
61+
"BLOCK_N": 64,
62+
"GROUP_M": 8,
63+
"num_stages": 3,
64+
"num_warps": 4
65+
},
66+
"8": {
67+
"BLOCK_K": 128,
68+
"BLOCK_M": 8,
69+
"BLOCK_N": 64,
70+
"GROUP_M": 8,
71+
"num_stages": 4,
72+
"num_warps": 4
73+
}
74+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
{
2+
"1": {
3+
"BLOCK_K": 128,
4+
"BLOCK_M": 8,
5+
"BLOCK_N": 64,
6+
"GROUP_M": 8,
7+
"num_stages": 4,
8+
"num_warps": 4
9+
},
10+
"100": {
11+
"BLOCK_K": 128,
12+
"BLOCK_M": 64,
13+
"BLOCK_N": 128,
14+
"GROUP_M": 8,
15+
"num_stages": 3,
16+
"num_warps": 8
17+
},
18+
"1024": {
19+
"BLOCK_K": 128,
20+
"BLOCK_M": 64,
21+
"BLOCK_N": 64,
22+
"GROUP_M": 8,
23+
"num_stages": 3,
24+
"num_warps": 4
25+
},
26+
"128": {
27+
"BLOCK_K": 128,
28+
"BLOCK_M": 32,
29+
"BLOCK_N": 64,
30+
"GROUP_M": 8,
31+
"num_stages": 3,
32+
"num_warps": 8
33+
},
34+
"16": {
35+
"BLOCK_K": 128,
36+
"BLOCK_M": 16,
37+
"BLOCK_N": 64,
38+
"GROUP_M": 8,
39+
"num_stages": 5,
40+
"num_warps": 4
41+
},
42+
"256": {
43+
"BLOCK_K": 128,
44+
"BLOCK_M": 64,
45+
"BLOCK_N": 64,
46+
"GROUP_M": 8,
47+
"num_stages": 3,
48+
"num_warps": 4
49+
},
50+
"32": {
51+
"BLOCK_K": 128,
52+
"BLOCK_M": 32,
53+
"BLOCK_N": 64,
54+
"GROUP_M": 8,
55+
"num_stages": 3,
56+
"num_warps": 8
57+
},
58+
"64": {
59+
"BLOCK_K": 128,
60+
"BLOCK_M": 64,
61+
"BLOCK_N": 64,
62+
"GROUP_M": 8,
63+
"num_stages": 3,
64+
"num_warps": 4
65+
},
66+
"8": {
67+
"BLOCK_K": 128,
68+
"BLOCK_M": 8,
69+
"BLOCK_N": 64,
70+
"GROUP_M": 8,
71+
"num_stages": 3,
72+
"num_warps": 4
73+
}
74+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
{
2+
"1": {
3+
"BLOCK_K": 128,
4+
"BLOCK_M": 8,
5+
"BLOCK_N": 64,
6+
"GROUP_M": 8,
7+
"num_stages": 4,
8+
"num_warps": 4
9+
},
10+
"100": {
11+
"BLOCK_K": 128,
12+
"BLOCK_M": 16,
13+
"BLOCK_N": 128,
14+
"GROUP_M": 8,
15+
"num_stages": 3,
16+
"num_warps": 8
17+
},
18+
"1024": {
19+
"BLOCK_K": 128,
20+
"BLOCK_M": 64,
21+
"BLOCK_N": 64,
22+
"GROUP_M": 8,
23+
"num_stages": 3,
24+
"num_warps": 4
25+
},
26+
"128": {
27+
"BLOCK_K": 128,
28+
"BLOCK_M": 64,
29+
"BLOCK_N": 64,
30+
"GROUP_M": 8,
31+
"num_stages": 5,
32+
"num_warps": 4
33+
},
34+
"16": {
35+
"BLOCK_K": 128,
36+
"BLOCK_M": 8,
37+
"BLOCK_N": 64,
38+
"GROUP_M": 8,
39+
"num_stages": 4,
40+
"num_warps": 4
41+
},
42+
"256": {
43+
"BLOCK_K": 128,
44+
"BLOCK_M": 32,
45+
"BLOCK_N": 64,
46+
"GROUP_M": 8,
47+
"num_stages": 3,
48+
"num_warps": 4
49+
},
50+
"32": {
51+
"BLOCK_K": 128,
52+
"BLOCK_M": 16,
53+
"BLOCK_N": 64,
54+
"GROUP_M": 8,
55+
"num_stages": 3,
56+
"num_warps": 4
57+
},
58+
"64": {
59+
"BLOCK_K": 128,
60+
"BLOCK_M": 32,
61+
"BLOCK_N": 64,
62+
"GROUP_M": 8,
63+
"num_stages": 4,
64+
"num_warps": 8
65+
},
66+
"8": {
67+
"BLOCK_K": 128,
68+
"BLOCK_M": 8,
69+
"BLOCK_N": 64,
70+
"GROUP_M": 8,
71+
"num_stages": 5,
72+
"num_warps": 4
73+
}
74+
}

0 commit comments

Comments
 (0)