psiddh
diff --git a/‎backends/vulkan/runtime/api/containers/StagingBuffer.cpp‎
Lines changed: 180 additions & 0 deletions b/‎backends/vulkan/runtime/api/containers/StagingBuffer.cpp‎
Lines changed: 180 additions & 0 deletions
diff --git a/‎backends/vulkan/runtime/api/containers/StagingBuffer.h‎
Lines changed: 11 additions & 7 deletions b/‎backends/vulkan/runtime/api/containers/StagingBuffer.h‎
Lines changed: 11 additions & 7 deletions
diff --git a/‎backends/vulkan/runtime/api/containers/Tensor.cpp‎
Lines changed: 12 additions & 2 deletions b/‎backends/vulkan/runtime/api/containers/Tensor.cpp‎
Lines changed: 12 additions & 2 deletions
diff --git a/‎backends/vulkan/runtime/graph/ComputeGraph.cpp‎
Lines changed: 15 additions & 0 deletions b/‎backends/vulkan/runtime/graph/ComputeGraph.cpp‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎backends/vulkan/runtime/graph/ComputeGraph.h‎
Lines changed: 10 additions & 2 deletions b/‎backends/vulkan/runtime/graph/ComputeGraph.h‎
Lines changed: 10 additions & 2 deletions
diff --git a/‎backends/vulkan/runtime/graph/ops/PrepackNode.cpp‎
Lines changed: 30 additions & 1 deletion b/‎backends/vulkan/runtime/graph/ops/PrepackNode.cpp‎
Lines changed: 30 additions & 1 deletion
@@ -0,0 +1,180 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <executorch/backends/vulkan/runtime/api/containers/StagingBuffer.h>
+
+namespace vkcompute {
+namespace api {
+
+namespace {
+
+//
+// The following fp16<->fp32 conversion functions are adapted from:
+// executorch/runtime/core/portable_type/c10/torch/headeronly/util/Half.h
+// (fp16_ieee_to_fp32_value and fp16_ieee_from_fp32_value)
+//
+
+inline float fp32_from_bits(uint32_t bits) {
+  float result;
+  std::memcpy(&result, &bits, sizeof(result));
+  return result;
+}
+
+inline uint32_t fp32_to_bits(float f) {
+  uint32_t bits;
+  std::memcpy(&bits, &f, sizeof(bits));
+  return bits;
+}
+
+/*
+ * Convert a 16-bit floating-point number in IEEE half-precision format, in bit
+ * representation, to a 32-bit floating-point number in IEEE single-precision
+ * format.
+ */
+float half_to_float(uint16_t h) {
+  /*
+   * Extend the half-precision floating-point number to 32 bits and shift to the
+   * upper part of the 32-bit word:
+   *      +---+-----+------------+-------------------+
+   *      | S |EEEEE|MM MMMM MMMM|0000 0000 0000 0000|
+   *      +---+-----+------------+-------------------+
+   * Bits  31  26-30    16-25            0-15
+   */
+  const uint32_t w = (uint32_t)h << 16;
+  /*
+   * Extract the sign of the input number into the high bit of the 32-bit word:
+   */
+  const uint32_t sign = w & UINT32_C(0x80000000);
+  /*
+   * Extract mantissa and biased exponent of the input number into the high bits
+   * of the 32-bit word:
+   */
+  const uint32_t two_w = w + w;
+
+  /*
+   * Shift mantissa and exponent into bits 23-28 and bits 13-22 so they become
+   * mantissa and exponent of a single-precision floating-point number:
+   *
+   * The exponent needs to be corrected by the difference in exponent bias
+   * between single-precision and half-precision formats (0x7F - 0xF = 0x70).
+   * We use 0xE0 initially and then scale by 2^(-112) to handle Inf/NaN.
+   */
+  constexpr uint32_t exp_offset = UINT32_C(0xE0) << 23;
+  constexpr uint32_t scale_bits = (uint32_t)15 << 23;
+  float exp_scale_val = 0;
+  std::memcpy(&exp_scale_val, &scale_bits, sizeof(exp_scale_val));
+  const float exp_scale = exp_scale_val;
+  const float normalized_value =
+      fp32_from_bits((two_w >> 4) + exp_offset) * exp_scale;
+
+  /*
+   * Convert denormalized half-precision inputs into single-precision results
+   * (always normalized). Zero inputs are also handled here.
+   */
+  constexpr uint32_t magic_mask = UINT32_C(126) << 23;
+  constexpr float magic_bias = 0.5f;
+  const float denormalized_value =
+      fp32_from_bits((two_w >> 17) | magic_mask) - magic_bias;
+
+  /*
+   * Choose either results of conversion of input as a normalized number, or
+   * as a denormalized number, depending on the input exponent.
+   */
+  constexpr uint32_t denormalized_cutoff = UINT32_C(1) << 27;
+  const uint32_t result = sign |
+      (two_w < denormalized_cutoff ? fp32_to_bits(denormalized_value)
+                                   : fp32_to_bits(normalized_value));
+  return fp32_from_bits(result);
+}
+
+/*
+ * Convert a 32-bit floating-point number in IEEE single-precision format to a
+ * 16-bit floating-point number in IEEE half-precision format, in bit
+ * representation.
+ */
+uint16_t float_to_half(float f) {
+  constexpr uint32_t scale_to_inf_bits = (uint32_t)239 << 23;
+  constexpr uint32_t scale_to_zero_bits = (uint32_t)17 << 23;
+  float scale_to_inf_val = 0, scale_to_zero_val = 0;
+  std::memcpy(&scale_to_inf_val, &scale_to_inf_bits, sizeof(scale_to_inf_val));
+  std::memcpy(
+      &scale_to_zero_val, &scale_to_zero_bits, sizeof(scale_to_zero_val));
+  const float scale_to_inf = scale_to_inf_val;
+  const float scale_to_zero = scale_to_zero_val;
+
+  float base = (fabsf(f) * scale_to_inf) * scale_to_zero;
+
+  const uint32_t w = fp32_to_bits(f);
+  const uint32_t shl1_w = w + w;
+  const uint32_t sign = w & UINT32_C(0x80000000);
+  uint32_t bias = shl1_w & UINT32_C(0xFF000000);
+  if (bias < UINT32_C(0x71000000)) {
+    bias = UINT32_C(0x71000000);
+  }
+
+  base = fp32_from_bits((bias >> 1) + UINT32_C(0x07800000)) + base;
+  const uint32_t bits = fp32_to_bits(base);
+  const uint32_t exp_bits = (bits >> 13) & UINT32_C(0x00007C00);
+  const uint32_t mantissa_bits = bits & UINT32_C(0x00000FFF);
+  const uint32_t nonsign = exp_bits + mantissa_bits;
+  return static_cast<uint16_t>(
+      (sign >> 16) |
+      (shl1_w > UINT32_C(0xFF000000) ? UINT16_C(0x7E00) : nonsign));
+}
+
+} // namespace
+
+StagingBuffer::StagingBuffer(
+    Context* context_p,
+    const vkapi::ScalarType dtype,
+    const size_t numel,
+    const vkapi::CopyDirection direction)
+    : context_p_(context_p),
+      dtype_(get_staging_dtype(context_p, dtype)),
+      vulkan_buffer_(context_p_->adapter_ptr()->vma().create_staging_buffer(
+          element_size(dtype_) * numel,
+          direction)),
+      mapped_data_(nullptr) {}
+
+vkapi::ScalarType get_staging_dtype(
+    Context* context_p,
+    vkapi::ScalarType dtype) {
+  if (dtype == vkapi::kHalf &&
+      !context_p->adapter_ptr()->has_full_float16_buffers_support()) {
+    return vkapi::kFloat;
+  }
+  return dtype;
+}
+
+void StagingBuffer::cast_half_to_float_and_copy_from(
+    const uint16_t* src,
+    const size_t numel) {
+  VK_CHECK_COND(numel <= this->numel());
+  float* dst = reinterpret_cast<float*>(data());
+  for (size_t i = 0; i < numel; ++i) {
+    dst[i] = half_to_float(src[i]);
+  }
+}
+
+void StagingBuffer::cast_float_to_half_and_copy_to(
+    uint16_t* dst,
+    const size_t numel) {
+  VK_CHECK_COND(numel <= this->numel());
+  vmaInvalidateAllocation(
+      vulkan_buffer_.vma_allocator(),
+      vulkan_buffer_.allocation(),
+      0u,
+      VK_WHOLE_SIZE);
+  const float* src = reinterpret_cast<const float*>(data());
+  for (size_t i = 0; i < numel; ++i) {
+    dst[i] = float_to_half(src[i]);
+  }
+}
+
+} // namespace api
+} // namespace vkcompute
@@ -19,6 +19,10 @@
 namespace vkcompute {
 namespace api {
 
+vkapi::ScalarType get_staging_dtype(
+    Context* context_p,
+    vkapi::ScalarType dtype);
+
 class StagingBuffer final {
  private:
   Context* context_p_;
@@ -32,13 +36,7 @@ class StagingBuffer final {
       Context* context_p,
       const vkapi::ScalarType dtype,
       const size_t numel,
-      const vkapi::CopyDirection direction)
-      : context_p_(context_p),
-        dtype_(dtype),
-        vulkan_buffer_(context_p_->adapter_ptr()->vma().create_staging_buffer(
-            element_size(dtype_) * numel,
-            direction)),
-        mapped_data_(nullptr) {}
+      const vkapi::CopyDirection direction);
 
   StagingBuffer(const StagingBuffer&) = delete;
   StagingBuffer& operator=(const StagingBuffer&) = delete;
@@ -92,6 +90,12 @@ class StagingBuffer final {
     }
   }
 
+  void cast_half_to_float_and_copy_from(
+      const uint16_t* src,
+      const size_t numel);
+
+  void cast_float_to_half_and_copy_to(uint16_t* dst, const size_t numel);
+
   inline void copy_to(void* dst, const size_t nbytes) {
     VK_CHECK_COND(nbytes <= this->nbytes());
     vmaInvalidateAllocation(
 
@@ -63,15 +63,25 @@ PackedDimInfo calculate_packed_dim_info(
 
 /*
  * For PackedInt8 memory layouts, ensure that the scalar type used for the
- * tensor is kInt8x4. Otherwise, return the original scalar type.
+ * tensor is kInt8x4.
+ *
+ * For kHalf dtype on devices that don't support float16 buffers, alias to
+ * kFloat.
+ *
+ * Otherwise, return the original scalar type.
  */
 vkapi::ScalarType get_effective_scalar_type(
+    Context* const context,
     const vkapi::ScalarType dtype,
     const utils::GPUMemoryLayout memory_layout) {
   vkapi::ScalarType effective_dtype = dtype;
   if (utils::is_packed_int8_layout(memory_layout)) {
     VK_CHECK_COND(dtype == vkapi::kInt8x4 || dtype == vkapi::kChar);
     effective_dtype = vkapi::kInt8x4;
+  } else if (
+      dtype == vkapi::kHalf &&
+      !context->adapter_ptr()->has_full_float16_buffers_support()) {
+    effective_dtype = vkapi::kFloat;
   }
   return effective_dtype;
 }
@@ -726,7 +736,7 @@ vTensor::vTensor(
     const utils::GPUMemoryLayout memory_layout,
     const bool allocate_memory,
     const utils::AxisMapLayout axis_map_layout)
-    : dtype_(get_effective_scalar_type(dtype, memory_layout)),
+    : dtype_(get_effective_scalar_type(context, dtype, memory_layout)),
       packed_dim_info_(calculate_packed_dim_info(memory_layout, storage_type)),
       // Calculate tensor metadata
       sizes_(sizes.begin(), sizes.end()),
 
@@ -11,6 +11,8 @@
 
 #include <executorch/backends/vulkan/runtime/graph/ComputeGraph.h>
 
+#include <executorch/backends/vulkan/runtime/api/containers/StagingBuffer.h>
+
 #include <executorch/backends/vulkan/runtime/graph/ops/impl/Staging.h>
 
 #include <executorch/backends/vulkan/runtime/graph/ops/utils/StagingUtils.h>
@@ -350,6 +352,11 @@ vkapi::ScalarType ComputeGraph::dtype_of(const ValueRef idx) const {
   VK_THROW("Could not get dtype of value with type ", val.type());
 }
 
+vkapi::ScalarType ComputeGraph::get_staging_dtype_for(
+    const ValueRef idx) const {
+  return api::get_staging_dtype(context_.get(), dtype_of(idx));
+}
+
 bool ComputeGraph::is_contiguous_buffer_tensor(const ValueRef idx) const {
   if (!val_is_tensor(idx)) {
     return false;
@@ -923,6 +930,10 @@ void ComputeGraph::maybe_cast_and_copy_into_staging(
         src_data_dtype == vkapi::kDouble && staging_dtype == vkapi::kFloat) {
       const double* casted_data = reinterpret_cast<const double*>(data);
       staging->cast_and_copy_from<double, float>(casted_data, numel);
+    } else if (
+        src_data_dtype == vkapi::kHalf && staging_dtype == vkapi::kFloat) {
+      const uint16_t* casted_data = reinterpret_cast<const uint16_t*>(data);
+      staging->cast_half_to_float_and_copy_from(casted_data, numel);
     } else {
       VK_THROW(
           "Unsupported type conversion from ",
@@ -962,6 +973,10 @@ void ComputeGraph::maybe_cast_and_copy_from_staging(
         dst_data_dtype == vkapi::kDouble && staging_dtype == vkapi::kFloat) {
       double* casted_data = reinterpret_cast<double*>(data);
       staging->cast_and_copy_to<float, double>(casted_data, numel);
+    } else if (
+        dst_data_dtype == vkapi::kHalf && staging_dtype == vkapi::kFloat) {
+      uint16_t* casted_data = reinterpret_cast<uint16_t*>(data);
+      staging->cast_float_to_half_and_copy_to(casted_data, numel);
     } else {
       VK_THROW(
           "Unsupported type conversion from staging dtype ",
 
@@ -352,6 +352,8 @@ class ComputeGraph final {
 
   vkapi::ScalarType dtype_of(const ValueRef idx) const;
 
+  vkapi::ScalarType get_staging_dtype_for(const ValueRef idx) const;
+
   inline const utils::ivec3& logical_limits_of(const ValueRef idx) const {
     return values_.at(idx).toConstTensor().logical_limits();
   }
@@ -997,17 +999,19 @@ class ComputeGraph final {
   // Input/Output
   //
 
+ private:
   void
   copy_into_staging(const ValueRef idx, const void* data, const size_t numel);
 
+  void copy_from_staging(const ValueRef idx, void* data, const size_t numel);
+
+ public:
   void maybe_cast_and_copy_into_staging(
       const ValueRef idx,
       const void* data,
       const size_t numel,
       const vkapi::ScalarType src_data_dtype);
 
-  void copy_from_staging(const ValueRef idx, void* data, const size_t numel);
-
   void maybe_cast_and_copy_from_staging(
       const ValueRef idx,
       void* data,
@@ -1110,6 +1114,10 @@ class ComputeGraph final {
     return context_->adapter_ptr()->supports_int16_shader_types();
   }
 
+  inline bool float16_buffers_enabled() const {
+    return context_->adapter_ptr()->has_full_float16_buffers_support();
+  }
+
   inline size_t execute_count() const {
     return execute_count_;
   }
 
@@ -70,7 +70,36 @@ api::StagingBuffer PrepackNode::create_staging_buffer(ComputeGraph* graph) {
       vkapi::CopyDirection::HOST_TO_DEVICE);
   graph->update_staging_nbytes_in_cmd(staging.buffer().mem_size_as_size_t());
   size_t nbytes = numel * vkapi::element_size(tref->dtype);
-  staging.copy_from(tref->data, nbytes);
+
+  // In some cases the staging dtype will diverge from the TensorRef dtype. The
+  // most common case for this is when the tensor data is float16, but the GPU
+  // does not support 16-bit storage buffers. In these cases, the tensor data
+  // is manually casted to the staging dtype.
+  vkapi::ScalarType staging_dtype = staging.dtype();
+  vkapi::ScalarType tref_dtype = tref->dtype;
+  if (staging_dtype == tref_dtype) {
+    staging.copy_from(tref->data, nbytes);
+  } else {
+    // Hard-coded type conversion cases
+    if (tref_dtype == vkapi::kHalf && staging_dtype == vkapi::kFloat) {
+      const uint16_t* casted_data =
+          reinterpret_cast<const uint16_t*>(tref->data);
+      staging.cast_half_to_float_and_copy_from(casted_data, numel);
+    } else if (tref_dtype == vkapi::kLong && staging_dtype == vkapi::kInt) {
+      const int64_t* casted_data = reinterpret_cast<const int64_t*>(tref->data);
+      staging.cast_and_copy_from<int64_t, int32_t>(casted_data, numel);
+    } else if (tref_dtype == vkapi::kDouble && staging_dtype == vkapi::kFloat) {
+      const double* casted_data = reinterpret_cast<const double*>(tref->data);
+      staging.cast_and_copy_from<double, float>(casted_data, numel);
+    } else {
+      VK_THROW(
+          "Unsupported type conversion from ",
+          tref_dtype,
+          " to staging dtype ",
+          staging_dtype);
+    }
+  }
+
   // Once the staging buffer is copied, if the TensorRef owns a FreeableBuffer,
   // it can be freed.
   tref->free_buffer();