File tree Expand file tree Collapse file tree 10 files changed +740
-0
lines changed
lightllm/common/triton_utils/autotune_kernel_configs/triton_3.5.1/NVIDIA_GeForce_RTX_4090_D/scaled_mm_act_per_group_w_perchannel:v1 Expand file tree Collapse file tree 10 files changed +740
-0
lines changed Original file line number Diff line number Diff line change 1+ {
2+ "1" : {
3+ "BLOCK_K" : 128 ,
4+ "BLOCK_M" : 8 ,
5+ "BLOCK_N" : 64 ,
6+ "GROUP_M" : 8 ,
7+ "num_stages" : 6 ,
8+ "num_warps" : 4
9+ },
10+ "100" : {
11+ "BLOCK_K" : 128 ,
12+ "BLOCK_M" : 64 ,
13+ "BLOCK_N" : 128 ,
14+ "GROUP_M" : 8 ,
15+ "num_stages" : 4 ,
16+ "num_warps" : 8
17+ },
18+ "1024" : {
19+ "BLOCK_K" : 128 ,
20+ "BLOCK_M" : 64 ,
21+ "BLOCK_N" : 64 ,
22+ "GROUP_M" : 8 ,
23+ "num_stages" : 3 ,
24+ "num_warps" : 4
25+ },
26+ "128" : {
27+ "BLOCK_K" : 128 ,
28+ "BLOCK_M" : 64 ,
29+ "BLOCK_N" : 64 ,
30+ "GROUP_M" : 8 ,
31+ "num_stages" : 4 ,
32+ "num_warps" : 4
33+ },
34+ "16" : {
35+ "BLOCK_K" : 128 ,
36+ "BLOCK_M" : 16 ,
37+ "BLOCK_N" : 64 ,
38+ "GROUP_M" : 8 ,
39+ "num_stages" : 4 ,
40+ "num_warps" : 4
41+ },
42+ "256" : {
43+ "BLOCK_K" : 128 ,
44+ "BLOCK_M" : 64 ,
45+ "BLOCK_N" : 64 ,
46+ "GROUP_M" : 8 ,
47+ "num_stages" : 3 ,
48+ "num_warps" : 4
49+ },
50+ "32" : {
51+ "BLOCK_K" : 128 ,
52+ "BLOCK_M" : 32 ,
53+ "BLOCK_N" : 64 ,
54+ "GROUP_M" : 8 ,
55+ "num_stages" : 4 ,
56+ "num_warps" : 8
57+ },
58+ "64" : {
59+ "BLOCK_K" : 128 ,
60+ "BLOCK_M" : 64 ,
61+ "BLOCK_N" : 64 ,
62+ "GROUP_M" : 8 ,
63+ "num_stages" : 3 ,
64+ "num_warps" : 4
65+ },
66+ "8" : {
67+ "BLOCK_K" : 128 ,
68+ "BLOCK_M" : 8 ,
69+ "BLOCK_N" : 64 ,
70+ "GROUP_M" : 8 ,
71+ "num_stages" : 5 ,
72+ "num_warps" : 4
73+ }
74+ }
Original file line number Diff line number Diff line change 1+ {
2+ "1" : {
3+ "BLOCK_K" : 128 ,
4+ "BLOCK_M" : 8 ,
5+ "BLOCK_N" : 64 ,
6+ "GROUP_M" : 8 ,
7+ "num_stages" : 4 ,
8+ "num_warps" : 4
9+ },
10+ "100" : {
11+ "BLOCK_K" : 128 ,
12+ "BLOCK_M" : 64 ,
13+ "BLOCK_N" : 128 ,
14+ "GROUP_M" : 8 ,
15+ "num_stages" : 4 ,
16+ "num_warps" : 8
17+ },
18+ "1024" : {
19+ "BLOCK_K" : 128 ,
20+ "BLOCK_M" : 64 ,
21+ "BLOCK_N" : 64 ,
22+ "GROUP_M" : 8 ,
23+ "num_stages" : 3 ,
24+ "num_warps" : 4
25+ },
26+ "128" : {
27+ "BLOCK_K" : 128 ,
28+ "BLOCK_M" : 64 ,
29+ "BLOCK_N" : 64 ,
30+ "GROUP_M" : 8 ,
31+ "num_stages" : 3 ,
32+ "num_warps" : 4
33+ },
34+ "16" : {
35+ "BLOCK_K" : 128 ,
36+ "BLOCK_M" : 16 ,
37+ "BLOCK_N" : 64 ,
38+ "GROUP_M" : 8 ,
39+ "num_stages" : 4 ,
40+ "num_warps" : 4
41+ },
42+ "256" : {
43+ "BLOCK_K" : 128 ,
44+ "BLOCK_M" : 64 ,
45+ "BLOCK_N" : 64 ,
46+ "GROUP_M" : 8 ,
47+ "num_stages" : 3 ,
48+ "num_warps" : 4
49+ },
50+ "32" : {
51+ "BLOCK_K" : 128 ,
52+ "BLOCK_M" : 32 ,
53+ "BLOCK_N" : 64 ,
54+ "GROUP_M" : 8 ,
55+ "num_stages" : 4 ,
56+ "num_warps" : 8
57+ },
58+ "64" : {
59+ "BLOCK_K" : 128 ,
60+ "BLOCK_M" : 32 ,
61+ "BLOCK_N" : 64 ,
62+ "GROUP_M" : 8 ,
63+ "num_stages" : 3 ,
64+ "num_warps" : 2
65+ },
66+ "8" : {
67+ "BLOCK_K" : 128 ,
68+ "BLOCK_M" : 8 ,
69+ "BLOCK_N" : 64 ,
70+ "GROUP_M" : 8 ,
71+ "num_stages" : 5 ,
72+ "num_warps" : 4
73+ }
74+ }
Original file line number Diff line number Diff line change 1+ {
2+ "1" : {
3+ "BLOCK_K" : 128 ,
4+ "BLOCK_M" : 8 ,
5+ "BLOCK_N" : 64 ,
6+ "GROUP_M" : 8 ,
7+ "num_stages" : 4 ,
8+ "num_warps" : 4
9+ },
10+ "100" : {
11+ "BLOCK_K" : 128 ,
12+ "BLOCK_M" : 64 ,
13+ "BLOCK_N" : 128 ,
14+ "GROUP_M" : 8 ,
15+ "num_stages" : 3 ,
16+ "num_warps" : 8
17+ },
18+ "1024" : {
19+ "BLOCK_K" : 128 ,
20+ "BLOCK_M" : 64 ,
21+ "BLOCK_N" : 64 ,
22+ "GROUP_M" : 8 ,
23+ "num_stages" : 3 ,
24+ "num_warps" : 4
25+ },
26+ "128" : {
27+ "BLOCK_K" : 128 ,
28+ "BLOCK_M" : 32 ,
29+ "BLOCK_N" : 64 ,
30+ "GROUP_M" : 8 ,
31+ "num_stages" : 3 ,
32+ "num_warps" : 8
33+ },
34+ "16" : {
35+ "BLOCK_K" : 128 ,
36+ "BLOCK_M" : 16 ,
37+ "BLOCK_N" : 64 ,
38+ "GROUP_M" : 8 ,
39+ "num_stages" : 4 ,
40+ "num_warps" : 4
41+ },
42+ "256" : {
43+ "BLOCK_K" : 128 ,
44+ "BLOCK_M" : 64 ,
45+ "BLOCK_N" : 64 ,
46+ "GROUP_M" : 8 ,
47+ "num_stages" : 3 ,
48+ "num_warps" : 4
49+ },
50+ "32" : {
51+ "BLOCK_K" : 128 ,
52+ "BLOCK_M" : 32 ,
53+ "BLOCK_N" : 64 ,
54+ "GROUP_M" : 8 ,
55+ "num_stages" : 3 ,
56+ "num_warps" : 8
57+ },
58+ "64" : {
59+ "BLOCK_K" : 128 ,
60+ "BLOCK_M" : 64 ,
61+ "BLOCK_N" : 64 ,
62+ "GROUP_M" : 8 ,
63+ "num_stages" : 3 ,
64+ "num_warps" : 4
65+ },
66+ "8" : {
67+ "BLOCK_K" : 128 ,
68+ "BLOCK_M" : 8 ,
69+ "BLOCK_N" : 64 ,
70+ "GROUP_M" : 8 ,
71+ "num_stages" : 4 ,
72+ "num_warps" : 4
73+ }
74+ }
Original file line number Diff line number Diff line change 1+ {
2+ "1" : {
3+ "BLOCK_K" : 128 ,
4+ "BLOCK_M" : 8 ,
5+ "BLOCK_N" : 64 ,
6+ "GROUP_M" : 8 ,
7+ "num_stages" : 4 ,
8+ "num_warps" : 4
9+ },
10+ "100" : {
11+ "BLOCK_K" : 128 ,
12+ "BLOCK_M" : 64 ,
13+ "BLOCK_N" : 128 ,
14+ "GROUP_M" : 8 ,
15+ "num_stages" : 3 ,
16+ "num_warps" : 8
17+ },
18+ "1024" : {
19+ "BLOCK_K" : 128 ,
20+ "BLOCK_M" : 64 ,
21+ "BLOCK_N" : 64 ,
22+ "GROUP_M" : 8 ,
23+ "num_stages" : 3 ,
24+ "num_warps" : 4
25+ },
26+ "128" : {
27+ "BLOCK_K" : 128 ,
28+ "BLOCK_M" : 32 ,
29+ "BLOCK_N" : 64 ,
30+ "GROUP_M" : 8 ,
31+ "num_stages" : 3 ,
32+ "num_warps" : 8
33+ },
34+ "16" : {
35+ "BLOCK_K" : 128 ,
36+ "BLOCK_M" : 16 ,
37+ "BLOCK_N" : 64 ,
38+ "GROUP_M" : 8 ,
39+ "num_stages" : 5 ,
40+ "num_warps" : 4
41+ },
42+ "256" : {
43+ "BLOCK_K" : 128 ,
44+ "BLOCK_M" : 64 ,
45+ "BLOCK_N" : 64 ,
46+ "GROUP_M" : 8 ,
47+ "num_stages" : 3 ,
48+ "num_warps" : 4
49+ },
50+ "32" : {
51+ "BLOCK_K" : 128 ,
52+ "BLOCK_M" : 32 ,
53+ "BLOCK_N" : 64 ,
54+ "GROUP_M" : 8 ,
55+ "num_stages" : 3 ,
56+ "num_warps" : 8
57+ },
58+ "64" : {
59+ "BLOCK_K" : 128 ,
60+ "BLOCK_M" : 64 ,
61+ "BLOCK_N" : 64 ,
62+ "GROUP_M" : 8 ,
63+ "num_stages" : 3 ,
64+ "num_warps" : 4
65+ },
66+ "8" : {
67+ "BLOCK_K" : 128 ,
68+ "BLOCK_M" : 8 ,
69+ "BLOCK_N" : 64 ,
70+ "GROUP_M" : 8 ,
71+ "num_stages" : 3 ,
72+ "num_warps" : 4
73+ }
74+ }
Original file line number Diff line number Diff line change 1+ {
2+ "1" : {
3+ "BLOCK_K" : 128 ,
4+ "BLOCK_M" : 8 ,
5+ "BLOCK_N" : 64 ,
6+ "GROUP_M" : 8 ,
7+ "num_stages" : 4 ,
8+ "num_warps" : 4
9+ },
10+ "100" : {
11+ "BLOCK_K" : 128 ,
12+ "BLOCK_M" : 16 ,
13+ "BLOCK_N" : 128 ,
14+ "GROUP_M" : 8 ,
15+ "num_stages" : 3 ,
16+ "num_warps" : 8
17+ },
18+ "1024" : {
19+ "BLOCK_K" : 128 ,
20+ "BLOCK_M" : 64 ,
21+ "BLOCK_N" : 64 ,
22+ "GROUP_M" : 8 ,
23+ "num_stages" : 3 ,
24+ "num_warps" : 4
25+ },
26+ "128" : {
27+ "BLOCK_K" : 128 ,
28+ "BLOCK_M" : 64 ,
29+ "BLOCK_N" : 64 ,
30+ "GROUP_M" : 8 ,
31+ "num_stages" : 5 ,
32+ "num_warps" : 4
33+ },
34+ "16" : {
35+ "BLOCK_K" : 128 ,
36+ "BLOCK_M" : 8 ,
37+ "BLOCK_N" : 64 ,
38+ "GROUP_M" : 8 ,
39+ "num_stages" : 4 ,
40+ "num_warps" : 4
41+ },
42+ "256" : {
43+ "BLOCK_K" : 128 ,
44+ "BLOCK_M" : 32 ,
45+ "BLOCK_N" : 64 ,
46+ "GROUP_M" : 8 ,
47+ "num_stages" : 3 ,
48+ "num_warps" : 4
49+ },
50+ "32" : {
51+ "BLOCK_K" : 128 ,
52+ "BLOCK_M" : 16 ,
53+ "BLOCK_N" : 64 ,
54+ "GROUP_M" : 8 ,
55+ "num_stages" : 3 ,
56+ "num_warps" : 4
57+ },
58+ "64" : {
59+ "BLOCK_K" : 128 ,
60+ "BLOCK_M" : 32 ,
61+ "BLOCK_N" : 64 ,
62+ "GROUP_M" : 8 ,
63+ "num_stages" : 4 ,
64+ "num_warps" : 8
65+ },
66+ "8" : {
67+ "BLOCK_K" : 128 ,
68+ "BLOCK_M" : 8 ,
69+ "BLOCK_N" : 64 ,
70+ "GROUP_M" : 8 ,
71+ "num_stages" : 5 ,
72+ "num_warps" : 4
73+ }
74+ }
You can’t perform that action at this time.
0 commit comments