|
2 | 2 |
|
3 | 3 | #pragma once |
4 | 4 |
|
| 5 | +#include <cute/numeric/numeric_types.hpp> |
5 | 6 | #include <cute/tensor.hpp> |
6 | 7 | #include <cutlass/numeric_conversion.h> |
7 | 8 |
|
| 9 | +namespace cutlass { |
| 10 | + |
| 11 | +using uint3b_t = integer_subbyte<3, false>; |
| 12 | +using uint5b_t = integer_subbyte<5, false>; |
| 13 | + |
| 14 | +template <typename T, int N, FloatRoundStyle Round> |
| 15 | +struct NumericArrayConverter<T, uint3b_t, N, Round> { |
| 16 | + static_assert(N % 8 == 0); |
| 17 | + |
| 18 | + using result_type = Array<T, N>; |
| 19 | + using source_type = Array<uint3b_t, N>; |
| 20 | + |
| 21 | + CUTLASS_HOST_DEVICE |
| 22 | + static result_type convert(const source_type& source) { |
| 23 | + result_type result; |
| 24 | + auto* s_base = reinterpret_cast<const uint8_t*>(&source); |
| 25 | + CUTLASS_PRAGMA_UNROLL |
| 26 | + for (int i = 0; i < N / 8; ++i) { |
| 27 | + auto* s = s_base + i * 3; |
| 28 | + result[i * 8] = T(s[0] & 0x07); |
| 29 | + result[i * 8 + 1] = T((s[0] & 0x38) >> 3); |
| 30 | + result[i * 8 + 2] = T((s[0] & 0xc0) >> 6) + T((s[1] & 0x01) << 2); |
| 31 | + result[i * 8 + 3] = T((s[1] & 0x0e) >> 1); |
| 32 | + result[i * 8 + 4] = T((s[1] & 0x70) >> 4); |
| 33 | + result[i * 8 + 5] = T((s[1] & 0x80) >> 7) + T((s[2] & 0x03) << 1); |
| 34 | + result[i * 8 + 6] = T((s[2] & 0x1c) >> 2); |
| 35 | + result[i * 8 + 7] = T((s[2] & 0xe0) >> 5); |
| 36 | + } |
| 37 | + return result; |
| 38 | + } |
| 39 | + |
| 40 | + CUTLASS_HOST_DEVICE |
| 41 | + result_type operator()(const source_type& s) const { |
| 42 | + return convert(s); |
| 43 | + } |
| 44 | +}; |
| 45 | + |
| 46 | +template <typename T, int N, FloatRoundStyle Round> |
| 47 | +struct NumericArrayConverter<T, uint5b_t, N, Round> { |
| 48 | + static_assert(N % 8 == 0); |
| 49 | + |
| 50 | + using result_type = Array<T, N>; |
| 51 | + using source_type = Array<uint5b_t, N>; |
| 52 | + |
| 53 | + CUTLASS_HOST_DEVICE |
| 54 | + static result_type convert(const source_type& source) { |
| 55 | + result_type result; |
| 56 | + auto* s_base = reinterpret_cast<const uint8_t*>(&source); |
| 57 | + CUTLASS_PRAGMA_UNROLL |
| 58 | + for (int i = 0; i < N / 8; ++i) { |
| 59 | + auto* s = s_base + i * 5; |
| 60 | + result[i * 8] = T(s[0] & 0x1f); |
| 61 | + result[i * 8 + 1] = T((s[0] & 0xe0) >> 5) + T((s[1] & 0x03) << 3); |
| 62 | + result[i * 8 + 2] = T((s[1] & 0x7c) >> 2); |
| 63 | + result[i * 8 + 3] = T((s[1] & 0x80) >> 7) + T((s[2] & 0x0f) << 1); |
| 64 | + result[i * 8 + 4] = T((s[2] & 0xf0) >> 4) + T((s[3] & 0x01) << 4); |
| 65 | + result[i * 8 + 5] = T((s[3] & 0x3e) >> 1); |
| 66 | + result[i * 8 + 6] = T((s[3] & 0xc0) >> 6) + T((s[4] & 0x07) << 2); |
| 67 | + result[i * 8 + 7] = T((s[4] & 0xf8) >> 3); |
| 68 | + } |
| 69 | + return result; |
| 70 | + } |
| 71 | + |
| 72 | + CUTLASS_HOST_DEVICE |
| 73 | + result_type operator()(const source_type& s) const { |
| 74 | + return convert(s); |
| 75 | + } |
| 76 | +}; |
| 77 | + |
| 78 | +template <typename T, int N, FloatRoundStyle Round> |
| 79 | +struct NumericArrayConverter<T, uint6b_t, N, Round> { |
| 80 | + static_assert(N % 4 == 0); |
| 81 | + |
| 82 | + using result_type = Array<T, N>; |
| 83 | + using source_type = Array<uint6b_t, N>; |
| 84 | + |
| 85 | + CUTLASS_HOST_DEVICE |
| 86 | + static result_type convert(const source_type& source) { |
| 87 | + result_type result; |
| 88 | + auto* s_base = reinterpret_cast<const uint8_t*>(&source); |
| 89 | + CUTLASS_PRAGMA_UNROLL |
| 90 | + for (int i = 0; i < N / 4; ++i) { |
| 91 | + auto* s = s_base + i * 3; |
| 92 | + result[i * 4] = T(s[0] & 0x3f); |
| 93 | + result[i * 4 + 1] = T((s[0] >> 6) & 0x03) + T((s[1] & 0x0f) << 2); |
| 94 | + result[i * 4 + 2] = T((s[1] >> 4) & 0x0f) + T((s[2] & 0x03) << 4); |
| 95 | + result[i * 4 + 3] = T((s[2] >> 2) & 0x3f); |
| 96 | + } |
| 97 | + return result; |
| 98 | + } |
| 99 | + |
| 100 | + CUTLASS_HOST_DEVICE |
| 101 | + result_type operator()(const source_type& s) const { |
| 102 | + return convert(s); |
| 103 | + } |
| 104 | +}; |
| 105 | + |
| 106 | +} // namespace cutlass |
| 107 | + |
| 108 | +namespace cute { |
| 109 | + |
| 110 | +// Required by tiled copy for 3/5/6-bit weights. |
| 111 | +struct uint24_t { |
| 112 | + std::array<std::uint8_t, 3> bytes; |
| 113 | +}; |
| 114 | +struct uint40_t { |
| 115 | + std::array<std::uint8_t, 5> bytes; |
| 116 | +}; |
| 117 | +struct uint48_t { |
| 118 | + std::array<std::uint8_t, 6> bytes; |
| 119 | +}; |
| 120 | + |
| 121 | +template <> |
| 122 | +struct uint_bit<24> { |
| 123 | + using type = uint24_t; |
| 124 | +}; |
| 125 | +template <> |
| 126 | +struct uint_bit<40> { |
| 127 | + using type = uint40_t; |
| 128 | +}; |
| 129 | +template <> |
| 130 | +struct uint_bit<48> { |
| 131 | + using type = uint48_t; |
| 132 | +}; |
| 133 | + |
| 134 | +} // namespace cute |
| 135 | + |
8 | 136 | namespace cutlass_gemm { |
9 | 137 |
|
10 | 138 | // Whether the quant type is affine quantization. |
|
0 commit comments