Skip to content

Commit 5fabf4d

Browse files
committed
Merge upstream/main into metal-gemm
2 parents cc9504c + c2a9d98 commit 5fabf4d

98 files changed

Lines changed: 40630 additions & 171 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/workflows/ci.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,19 +44,19 @@ jobs:
4444
fetch-depth: 0
4545
submodules: recursive
4646

47-
- name: Setup Python 3.9
47+
- name: Setup Python 3.10
4848
id: setup-pylowest
4949
uses: actions/setup-python@v6
5050
with:
51-
python-version: "3.9"
51+
python-version: "3.10"
5252
update-environment: true
5353
cache: pip
5454
cache-dependency-path: |
5555
pyproject.toml
5656
requirements*.txt
5757
.pre-commit-config.yaml
5858
59-
- name: Check AST with Python 3.9
59+
- name: Check AST with Python 3.10
6060
run: |
6161
"${{ steps.setup-pylowest.outputs.python-path }}" -m compileall -q -f tilelang
6262

.github/workflows/dist.yml

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,7 @@ on:
1818
- CMakeLists.txt
1919
- version_provider.py
2020
- .github/workflows/dist.yml
21-
# temporarily add to dist check
22-
# until we have type checking in ci / move to python 3.10
21+
# Type aliases can affect package import/build behavior.
2322
- tilelang/_typing.py
2423
release:
2524
types:
@@ -115,12 +114,11 @@ jobs:
115114
strategy:
116115
matrix:
117116
target:
118-
# Build wheels for different Python ABIs.
119-
# Windows CUDA 13.0 uses cp310 because PyTorch cu130 does not publish cp39 wheels.
120-
- { runner: ubuntu-latest, toolkit: "CUDA-12.8", test_backends: "cu118 cu130", python_version: "3.9" }
121-
- { runner: ubuntu-24.04-arm, toolkit: "CUDA-12.8", test_backends: "cu126 cu130", python_version: "3.9" }
117+
# Build wheels for the minimum supported Python ABI.
118+
- { runner: ubuntu-latest, toolkit: "CUDA-12.8", test_backends: "cu118 cu130", python_version: "3.10" }
119+
- { runner: ubuntu-24.04-arm, toolkit: "CUDA-12.8", test_backends: "cu126 cu130", python_version: "3.10" }
122120
- { runner: windows-latest, toolkit: "CUDA-13.0", test_backends: "cu130", python_version: "3.10" }
123-
- { runner: macos-latest, toolkit: "Metal", python_version: "3.9" }
121+
- { runner: macos-latest, toolkit: "Metal", python_version: "3.10" }
124122
# - "3.14t" # let user to build from source for now
125123
# TODO: Add cp315-abi3.abi3t after PEP 803
126124
fail-fast: false
Lines changed: 311 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,311 @@
1+
/*
2+
Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved.
3+
4+
Permission is hereby granted, free of charge, to any person obtaining a copy
5+
of this software and associated documentation files (the "Software"), to deal
6+
in the Software without restriction, including without limitation the rights
7+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8+
copies of the Software, and to permit persons to whom the Software is
9+
furnished to do so, subject to the following conditions:
10+
11+
The above copyright notice and this permission notice shall be included in
12+
all copies or substantial portions of the Software.
13+
14+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20+
THE SOFTWARE.
21+
*/
22+
23+
#ifndef HIP_INCLUDE_HIP_AMD_DETAIL_CHANNEL_DESCRIPTOR_H
24+
#define HIP_INCLUDE_HIP_AMD_DETAIL_CHANNEL_DESCRIPTOR_H
25+
26+
#if !defined(__HIPCC_RTC__)
27+
#include <hip/hip_common.h>
28+
#include <hip/driver_types.h>
29+
#include <hip/amd_detail/amd_hip_vector_types.h>
30+
#endif
31+
32+
#ifdef __cplusplus
33+
34+
extern "C" HIP_PUBLIC_API hipChannelFormatDesc hipCreateChannelDesc(int x, int y, int z, int w,
35+
hipChannelFormatKind f);
36+
37+
static inline hipChannelFormatDesc hipCreateChannelDescHalf() {
38+
int e = (int)sizeof(unsigned short) * 8;
39+
return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindFloat);
40+
}
41+
42+
static inline hipChannelFormatDesc hipCreateChannelDescHalf1() {
43+
int e = (int)sizeof(unsigned short) * 8;
44+
return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindFloat);
45+
}
46+
47+
static inline hipChannelFormatDesc hipCreateChannelDescHalf2() {
48+
int e = (int)sizeof(unsigned short) * 8;
49+
return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindFloat);
50+
}
51+
52+
static inline hipChannelFormatDesc hipCreateChannelDescHalf4() {
53+
int e = (int)sizeof(unsigned short) * 8;
54+
return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindFloat);
55+
}
56+
57+
template <typename T> static inline hipChannelFormatDesc hipCreateChannelDesc() {
58+
return hipCreateChannelDesc(0, 0, 0, 0, hipChannelFormatKindNone);
59+
}
60+
61+
template <> inline hipChannelFormatDesc hipCreateChannelDesc<char>() {
62+
int e = (int)sizeof(char) * 8;
63+
return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned);
64+
}
65+
66+
template <> inline hipChannelFormatDesc hipCreateChannelDesc<signed char>() {
67+
int e = (int)sizeof(signed char) * 8;
68+
return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned);
69+
}
70+
71+
template <> inline hipChannelFormatDesc hipCreateChannelDesc<unsigned char>() {
72+
int e = (int)sizeof(unsigned char) * 8;
73+
return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindUnsigned);
74+
}
75+
76+
template <> inline hipChannelFormatDesc hipCreateChannelDesc<uchar1>() {
77+
int e = (int)sizeof(unsigned char) * 8;
78+
return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindUnsigned);
79+
}
80+
81+
template <> inline hipChannelFormatDesc hipCreateChannelDesc<char1>() {
82+
int e = (int)sizeof(signed char) * 8;
83+
return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned);
84+
}
85+
86+
template <> inline hipChannelFormatDesc hipCreateChannelDesc<uchar2>() {
87+
int e = (int)sizeof(unsigned char) * 8;
88+
return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindUnsigned);
89+
}
90+
91+
template <> inline hipChannelFormatDesc hipCreateChannelDesc<char2>() {
92+
int e = (int)sizeof(signed char) * 8;
93+
return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindSigned);
94+
}
95+
96+
#ifndef __GNUC__ // vector3 is the same as vector4
97+
template <> inline hipChannelFormatDesc hipCreateChannelDesc<uchar3>() {
98+
int e = (int)sizeof(unsigned char) * 8;
99+
return hipCreateChannelDesc(e, e, e, 0, hipChannelFormatKindUnsigned);
100+
}
101+
102+
template <> inline hipChannelFormatDesc hipCreateChannelDesc<char3>() {
103+
int e = (int)sizeof(signed char) * 8;
104+
return hipCreateChannelDesc(e, e, e, 0, hipChannelFormatKindSigned);
105+
}
106+
#endif
107+
108+
template <> inline hipChannelFormatDesc hipCreateChannelDesc<uchar4>() {
109+
int e = (int)sizeof(unsigned char) * 8;
110+
return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindUnsigned);
111+
}
112+
113+
template <> inline hipChannelFormatDesc hipCreateChannelDesc<char4>() {
114+
int e = (int)sizeof(signed char) * 8;
115+
return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindSigned);
116+
}
117+
118+
template <> inline hipChannelFormatDesc hipCreateChannelDesc<unsigned short>() {
119+
int e = (int)sizeof(unsigned short) * 8;
120+
return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindUnsigned);
121+
}
122+
123+
template <> inline hipChannelFormatDesc hipCreateChannelDesc<signed short>() {
124+
int e = (int)sizeof(signed short) * 8;
125+
return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned);
126+
}
127+
128+
template <> inline hipChannelFormatDesc hipCreateChannelDesc<ushort1>() {
129+
int e = (int)sizeof(unsigned short) * 8;
130+
return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindUnsigned);
131+
}
132+
133+
template <> inline hipChannelFormatDesc hipCreateChannelDesc<short1>() {
134+
int e = (int)sizeof(signed short) * 8;
135+
return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned);
136+
}
137+
138+
template <> inline hipChannelFormatDesc hipCreateChannelDesc<ushort2>() {
139+
int e = (int)sizeof(unsigned short) * 8;
140+
return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindUnsigned);
141+
}
142+
143+
template <> inline hipChannelFormatDesc hipCreateChannelDesc<short2>() {
144+
int e = (int)sizeof(signed short) * 8;
145+
return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindSigned);
146+
}
147+
148+
#ifndef __GNUC__
149+
template <> inline hipChannelFormatDesc hipCreateChannelDesc<ushort3>() {
150+
int e = (int)sizeof(unsigned short) * 8;
151+
return hipCreateChannelDesc(e, e, e, 0, hipChannelFormatKindUnsigned);
152+
}
153+
154+
template <> inline hipChannelFormatDesc hipCreateChannelDesc<short3>() {
155+
int e = (int)sizeof(signed short) * 8;
156+
return hipCreateChannelDesc(e, e, e, 0, hipChannelFormatKindSigned);
157+
}
158+
#endif
159+
160+
template <> inline hipChannelFormatDesc hipCreateChannelDesc<ushort4>() {
161+
int e = (int)sizeof(unsigned short) * 8;
162+
return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindUnsigned);
163+
}
164+
165+
template <> inline hipChannelFormatDesc hipCreateChannelDesc<short4>() {
166+
int e = (int)sizeof(signed short) * 8;
167+
return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindSigned);
168+
}
169+
170+
template <> inline hipChannelFormatDesc hipCreateChannelDesc<unsigned int>() {
171+
int e = (int)sizeof(unsigned int) * 8;
172+
return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindUnsigned);
173+
}
174+
175+
template <> inline hipChannelFormatDesc hipCreateChannelDesc<signed int>() {
176+
int e = (int)sizeof(signed int) * 8;
177+
return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned);
178+
}
179+
180+
template <> inline hipChannelFormatDesc hipCreateChannelDesc<uint1>() {
181+
int e = (int)sizeof(unsigned int) * 8;
182+
return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindUnsigned);
183+
}
184+
185+
template <> inline hipChannelFormatDesc hipCreateChannelDesc<int1>() {
186+
int e = (int)sizeof(signed int) * 8;
187+
return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned);
188+
}
189+
190+
template <> inline hipChannelFormatDesc hipCreateChannelDesc<uint2>() {
191+
int e = (int)sizeof(unsigned int) * 8;
192+
return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindUnsigned);
193+
}
194+
195+
template <> inline hipChannelFormatDesc hipCreateChannelDesc<int2>() {
196+
int e = (int)sizeof(signed int) * 8;
197+
return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindSigned);
198+
}
199+
200+
#ifndef __GNUC__
201+
template <> inline hipChannelFormatDesc hipCreateChannelDesc<uint3>() {
202+
int e = (int)sizeof(unsigned int) * 8;
203+
return hipCreateChannelDesc(e, e, e, 0, hipChannelFormatKindUnsigned);
204+
}
205+
206+
template <> inline hipChannelFormatDesc hipCreateChannelDesc<int3>() {
207+
int e = (int)sizeof(signed int) * 8;
208+
return hipCreateChannelDesc(e, e, e, 0, hipChannelFormatKindSigned);
209+
}
210+
#endif
211+
212+
template <> inline hipChannelFormatDesc hipCreateChannelDesc<uint4>() {
213+
int e = (int)sizeof(unsigned int) * 8;
214+
return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindUnsigned);
215+
}
216+
217+
template <> inline hipChannelFormatDesc hipCreateChannelDesc<int4>() {
218+
int e = (int)sizeof(signed int) * 8;
219+
return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindSigned);
220+
}
221+
222+
template <> inline hipChannelFormatDesc hipCreateChannelDesc<float>() {
223+
int e = (int)sizeof(float) * 8;
224+
return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindFloat);
225+
}
226+
227+
template <> inline hipChannelFormatDesc hipCreateChannelDesc<float1>() {
228+
int e = (int)sizeof(float) * 8;
229+
return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindFloat);
230+
}
231+
232+
template <> inline hipChannelFormatDesc hipCreateChannelDesc<float2>() {
233+
int e = (int)sizeof(float) * 8;
234+
return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindFloat);
235+
}
236+
237+
#ifndef __GNUC__
238+
template <> inline hipChannelFormatDesc hipCreateChannelDesc<float3>() {
239+
int e = (int)sizeof(float) * 8;
240+
return hipCreateChannelDesc(e, e, e, 0, hipChannelFormatKindFloat);
241+
}
242+
#endif
243+
244+
template <> inline hipChannelFormatDesc hipCreateChannelDesc<float4>() {
245+
int e = (int)sizeof(float) * 8;
246+
return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindFloat);
247+
}
248+
249+
#if !defined(__LP64__)
250+
251+
template <> inline hipChannelFormatDesc hipCreateChannelDesc<unsigned long>() {
252+
int e = (int)sizeof(unsigned long) * 8;
253+
return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindUnsigned);
254+
}
255+
256+
template <> inline hipChannelFormatDesc hipCreateChannelDesc<signed long>() {
257+
int e = (int)sizeof(signed long) * 8;
258+
return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned);
259+
}
260+
261+
template <> inline hipChannelFormatDesc hipCreateChannelDesc<ulong1>() {
262+
int e = (int)sizeof(unsigned long) * 8;
263+
return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindUnsigned);
264+
}
265+
266+
template <> inline hipChannelFormatDesc hipCreateChannelDesc<long1>() {
267+
int e = (int)sizeof(signed long) * 8;
268+
return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned);
269+
}
270+
271+
template <> inline hipChannelFormatDesc hipCreateChannelDesc<ulong2>() {
272+
int e = (int)sizeof(unsigned long) * 8;
273+
return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindUnsigned);
274+
}
275+
276+
template <> inline hipChannelFormatDesc hipCreateChannelDesc<long2>() {
277+
int e = (int)sizeof(signed long) * 8;
278+
return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindSigned);
279+
}
280+
281+
#ifndef __GNUC__
282+
template <> inline hipChannelFormatDesc hipCreateChannelDesc<ulong3>() {
283+
int e = (int)sizeof(unsigned long) * 8;
284+
return hipCreateChannelDesc(e, e, e, 0, hipChannelFormatKindUnsigned);
285+
}
286+
287+
template <> inline hipChannelFormatDesc hipCreateChannelDesc<long3>() {
288+
int e = (int)sizeof(signed long) * 8;
289+
return hipCreateChannelDesc(e, e, e, 0, hipChannelFormatKindSigned);
290+
}
291+
#endif
292+
293+
template <> inline hipChannelFormatDesc hipCreateChannelDesc<ulong4>() {
294+
int e = (int)sizeof(unsigned long) * 8;
295+
return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindUnsigned);
296+
}
297+
298+
template <> inline hipChannelFormatDesc hipCreateChannelDesc<long4>() {
299+
int e = (int)sizeof(signed long) * 8;
300+
return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindSigned);
301+
}
302+
#endif /* !__LP64__ */
303+
304+
#else
305+
306+
struct hipChannelFormatDesc hipCreateChannelDesc(int x, int y, int z, int w,
307+
enum hipChannelFormatKind f);
308+
309+
#endif /* __cplusplus */
310+
311+
#endif /* !HIP_INCLUDE_HIP_AMD_DETAIL_CHANNEL_DESCRIPTOR_H */

0 commit comments

Comments
 (0)