|
| 1 | +/* |
| 2 | + * Copyright (c) Meta Platforms, Inc. and affiliates. |
| 3 | + * All rights reserved. |
| 4 | + * |
| 5 | + * This source code is licensed under the BSD-style license found in the |
| 6 | + * LICENSE file in the root directory of this source tree. |
| 7 | + */ |
| 8 | + |
| 9 | +#include <executorch/backends/vulkan/runtime/api/containers/StagingBuffer.h> |
| 10 | + |
| 11 | +namespace vkcompute { |
| 12 | +namespace api { |
| 13 | + |
| 14 | +namespace { |
| 15 | + |
| 16 | +// |
| 17 | +// The following fp16<->fp32 conversion functions are adapted from: |
| 18 | +// executorch/runtime/core/portable_type/c10/torch/headeronly/util/Half.h |
| 19 | +// (fp16_ieee_to_fp32_value and fp16_ieee_from_fp32_value) |
| 20 | +// |
| 21 | + |
| 22 | +inline float fp32_from_bits(uint32_t bits) { |
| 23 | + float result; |
| 24 | + std::memcpy(&result, &bits, sizeof(result)); |
| 25 | + return result; |
| 26 | +} |
| 27 | + |
| 28 | +inline uint32_t fp32_to_bits(float f) { |
| 29 | + uint32_t bits; |
| 30 | + std::memcpy(&bits, &f, sizeof(bits)); |
| 31 | + return bits; |
| 32 | +} |
| 33 | + |
| 34 | +/* |
| 35 | + * Convert a 16-bit floating-point number in IEEE half-precision format, in bit |
| 36 | + * representation, to a 32-bit floating-point number in IEEE single-precision |
| 37 | + * format. |
| 38 | + */ |
| 39 | +float half_to_float(uint16_t h) { |
| 40 | + /* |
| 41 | + * Extend the half-precision floating-point number to 32 bits and shift to the |
| 42 | + * upper part of the 32-bit word: |
| 43 | + * +---+-----+------------+-------------------+ |
| 44 | + * | S |EEEEE|MM MMMM MMMM|0000 0000 0000 0000| |
| 45 | + * +---+-----+------------+-------------------+ |
| 46 | + * Bits 31 26-30 16-25 0-15 |
| 47 | + */ |
| 48 | + const uint32_t w = (uint32_t)h << 16; |
| 49 | + /* |
| 50 | + * Extract the sign of the input number into the high bit of the 32-bit word: |
| 51 | + */ |
| 52 | + const uint32_t sign = w & UINT32_C(0x80000000); |
| 53 | + /* |
| 54 | + * Extract mantissa and biased exponent of the input number into the high bits |
| 55 | + * of the 32-bit word: |
| 56 | + */ |
| 57 | + const uint32_t two_w = w + w; |
| 58 | + |
| 59 | + /* |
| 60 | + * Shift mantissa and exponent into bits 23-28 and bits 13-22 so they become |
| 61 | + * mantissa and exponent of a single-precision floating-point number: |
| 62 | + * |
| 63 | + * The exponent needs to be corrected by the difference in exponent bias |
| 64 | + * between single-precision and half-precision formats (0x7F - 0xF = 0x70). |
| 65 | + * We use 0xE0 initially and then scale by 2^(-112) to handle Inf/NaN. |
| 66 | + */ |
| 67 | + constexpr uint32_t exp_offset = UINT32_C(0xE0) << 23; |
| 68 | + constexpr uint32_t scale_bits = (uint32_t)15 << 23; |
| 69 | + float exp_scale_val = 0; |
| 70 | + std::memcpy(&exp_scale_val, &scale_bits, sizeof(exp_scale_val)); |
| 71 | + const float exp_scale = exp_scale_val; |
| 72 | + const float normalized_value = |
| 73 | + fp32_from_bits((two_w >> 4) + exp_offset) * exp_scale; |
| 74 | + |
| 75 | + /* |
| 76 | + * Convert denormalized half-precision inputs into single-precision results |
| 77 | + * (always normalized). Zero inputs are also handled here. |
| 78 | + */ |
| 79 | + constexpr uint32_t magic_mask = UINT32_C(126) << 23; |
| 80 | + constexpr float magic_bias = 0.5f; |
| 81 | + const float denormalized_value = |
| 82 | + fp32_from_bits((two_w >> 17) | magic_mask) - magic_bias; |
| 83 | + |
| 84 | + /* |
| 85 | + * Choose either results of conversion of input as a normalized number, or |
| 86 | + * as a denormalized number, depending on the input exponent. |
| 87 | + */ |
| 88 | + constexpr uint32_t denormalized_cutoff = UINT32_C(1) << 27; |
| 89 | + const uint32_t result = sign | |
| 90 | + (two_w < denormalized_cutoff ? fp32_to_bits(denormalized_value) |
| 91 | + : fp32_to_bits(normalized_value)); |
| 92 | + return fp32_from_bits(result); |
| 93 | +} |
| 94 | + |
| 95 | +/* |
| 96 | + * Convert a 32-bit floating-point number in IEEE single-precision format to a |
| 97 | + * 16-bit floating-point number in IEEE half-precision format, in bit |
| 98 | + * representation. |
| 99 | + */ |
| 100 | +uint16_t float_to_half(float f) { |
| 101 | + constexpr uint32_t scale_to_inf_bits = (uint32_t)239 << 23; |
| 102 | + constexpr uint32_t scale_to_zero_bits = (uint32_t)17 << 23; |
| 103 | + float scale_to_inf_val = 0, scale_to_zero_val = 0; |
| 104 | + std::memcpy(&scale_to_inf_val, &scale_to_inf_bits, sizeof(scale_to_inf_val)); |
| 105 | + std::memcpy( |
| 106 | + &scale_to_zero_val, &scale_to_zero_bits, sizeof(scale_to_zero_val)); |
| 107 | + const float scale_to_inf = scale_to_inf_val; |
| 108 | + const float scale_to_zero = scale_to_zero_val; |
| 109 | + |
| 110 | + float base = (fabsf(f) * scale_to_inf) * scale_to_zero; |
| 111 | + |
| 112 | + const uint32_t w = fp32_to_bits(f); |
| 113 | + const uint32_t shl1_w = w + w; |
| 114 | + const uint32_t sign = w & UINT32_C(0x80000000); |
| 115 | + uint32_t bias = shl1_w & UINT32_C(0xFF000000); |
| 116 | + if (bias < UINT32_C(0x71000000)) { |
| 117 | + bias = UINT32_C(0x71000000); |
| 118 | + } |
| 119 | + |
| 120 | + base = fp32_from_bits((bias >> 1) + UINT32_C(0x07800000)) + base; |
| 121 | + const uint32_t bits = fp32_to_bits(base); |
| 122 | + const uint32_t exp_bits = (bits >> 13) & UINT32_C(0x00007C00); |
| 123 | + const uint32_t mantissa_bits = bits & UINT32_C(0x00000FFF); |
| 124 | + const uint32_t nonsign = exp_bits + mantissa_bits; |
| 125 | + return static_cast<uint16_t>( |
| 126 | + (sign >> 16) | |
| 127 | + (shl1_w > UINT32_C(0xFF000000) ? UINT16_C(0x7E00) : nonsign)); |
| 128 | +} |
| 129 | + |
| 130 | +} // namespace |
| 131 | + |
| 132 | +StagingBuffer::StagingBuffer( |
| 133 | + Context* context_p, |
| 134 | + const vkapi::ScalarType dtype, |
| 135 | + const size_t numel, |
| 136 | + const vkapi::CopyDirection direction) |
| 137 | + : context_p_(context_p), |
| 138 | + dtype_(get_staging_dtype(context_p, dtype)), |
| 139 | + vulkan_buffer_(context_p_->adapter_ptr()->vma().create_staging_buffer( |
| 140 | + element_size(dtype_) * numel, |
| 141 | + direction)), |
| 142 | + mapped_data_(nullptr) {} |
| 143 | + |
| 144 | +vkapi::ScalarType get_staging_dtype( |
| 145 | + Context* context_p, |
| 146 | + vkapi::ScalarType dtype) { |
| 147 | + if (dtype == vkapi::kHalf && |
| 148 | + !context_p->adapter_ptr()->has_full_float16_buffers_support()) { |
| 149 | + return vkapi::kFloat; |
| 150 | + } |
| 151 | + return dtype; |
| 152 | +} |
| 153 | + |
| 154 | +void StagingBuffer::cast_half_to_float_and_copy_from( |
| 155 | + const uint16_t* src, |
| 156 | + const size_t numel) { |
| 157 | + VK_CHECK_COND(numel <= this->numel()); |
| 158 | + float* dst = reinterpret_cast<float*>(data()); |
| 159 | + for (size_t i = 0; i < numel; ++i) { |
| 160 | + dst[i] = half_to_float(src[i]); |
| 161 | + } |
| 162 | +} |
| 163 | + |
| 164 | +void StagingBuffer::cast_float_to_half_and_copy_to( |
| 165 | + uint16_t* dst, |
| 166 | + const size_t numel) { |
| 167 | + VK_CHECK_COND(numel <= this->numel()); |
| 168 | + vmaInvalidateAllocation( |
| 169 | + vulkan_buffer_.vma_allocator(), |
| 170 | + vulkan_buffer_.allocation(), |
| 171 | + 0u, |
| 172 | + VK_WHOLE_SIZE); |
| 173 | + const float* src = reinterpret_cast<const float*>(data()); |
| 174 | + for (size_t i = 0; i < numel; ++i) { |
| 175 | + dst[i] = float_to_half(src[i]); |
| 176 | + } |
| 177 | +} |
| 178 | + |
| 179 | +} // namespace api |
| 180 | +} // namespace vkcompute |
0 commit comments