tensorflow · veblush · Jun 18, 2026 · Jun 18, 2026
@@ -30,16 +30,17 @@ tensorflow/lite/core/c/common.h
 tensorflow/lite/core/macros.h
 tensorflow/lite/kernels/internal/common.h
 tensorflow/lite/kernels/internal/compatibility.h
-tensorflow/lite/kernels/internal/portable_tensor_utils.h
 tensorflow/lite/kernels/internal/portable_tensor_utils.cc
+tensorflow/lite/kernels/internal/portable_tensor_utils.h
 tensorflow/lite/kernels/internal/quantization_util.h
-tensorflow/lite/kernels/internal/reference/add.h
 tensorflow/lite/kernels/internal/reference/add_n.h
+tensorflow/lite/kernels/internal/reference/add.h
 tensorflow/lite/kernels/internal/reference/arg_min_max.h
-tensorflow/lite/kernels/internal/reference/batch_to_space_nd.h
 tensorflow/lite/kernels/internal/reference/batch_matmul.h
+tensorflow/lite/kernels/internal/reference/batch_to_space_nd.h
 tensorflow/lite/kernels/internal/reference/binary_function.h
 tensorflow/lite/kernels/internal/reference/broadcast_args.h
+tensorflow/lite/kernels/internal/reference/broadcast_loop.h
 tensorflow/lite/kernels/internal/reference/broadcast_to.h
 tensorflow/lite/kernels/internal/reference/ceil.h
 tensorflow/lite/kernels/internal/reference/comparisons.h
@@ -54,17 +55,17 @@ tensorflow/lite/kernels/internal/reference/div.h
 tensorflow/lite/kernels/internal/reference/elu.h
 tensorflow/lite/kernels/internal/reference/exp.h
 tensorflow/lite/kernels/internal/reference/fill.h
-tensorflow/lite/kernels/internal/reference/floor.h
 tensorflow/lite/kernels/internal/reference/floor_div.h
 tensorflow/lite/kernels/internal/reference/floor_mod.h
+tensorflow/lite/kernels/internal/reference/floor.h
 tensorflow/lite/kernels/internal/reference/fully_connected.h
 tensorflow/lite/kernels/internal/reference/hard_swish.h
 tensorflow/lite/kernels/internal/reference/integer_ops/add.h
 tensorflow/lite/kernels/internal/reference/integer_ops/conv.h
 tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h
 tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h
-tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h
 tensorflow/lite/kernels/internal/reference/integer_ops/l2normalization.h
+tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h
 tensorflow/lite/kernels/internal/reference/integer_ops/mean.h
 tensorflow/lite/kernels/internal/reference/integer_ops/mul.h
 tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h
@@ -73,14 +74,16 @@ tensorflow/lite/kernels/internal/reference/integer_ops/transpose_conv.h
 tensorflow/lite/kernels/internal/reference/l2normalization.h
 tensorflow/lite/kernels/internal/reference/leaky_relu.h
 tensorflow/lite/kernels/internal/reference/log_softmax.h
+tensorflow/lite/kernels/internal/reference/logistic.h
+tensorflow/lite/kernels/internal/reference/lstm_cell.h
 tensorflow/lite/kernels/internal/reference/maximum_minimum.h
 tensorflow/lite/kernels/internal/reference/mul.h
 tensorflow/lite/kernels/internal/reference/neg.h
 tensorflow/lite/kernels/internal/reference/pad.h
 tensorflow/lite/kernels/internal/reference/pooling.h
+tensorflow/lite/kernels/internal/reference/portable_tensor_utils_impl.h
 tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc
 tensorflow/lite/kernels/internal/reference/portable_tensor_utils.h
-tensorflow/lite/kernels/internal/reference/portable_tensor_utils_impl.h
 tensorflow/lite/kernels/internal/reference/prelu.h
 tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h
 tensorflow/lite/kernels/internal/reference/quantize.h
@@ -90,18 +93,16 @@ tensorflow/lite/kernels/internal/reference/resize_bilinear.h
 tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h
 tensorflow/lite/kernels/internal/reference/reverse.h
 tensorflow/lite/kernels/internal/reference/round.h
+tensorflow/lite/kernels/internal/reference/select.h
+tensorflow/lite/kernels/internal/reference/slice.h
 tensorflow/lite/kernels/internal/reference/softmax.h
 tensorflow/lite/kernels/internal/reference/space_to_batch_nd.h
 tensorflow/lite/kernels/internal/reference/space_to_depth.h
-tensorflow/lite/kernels/internal/reference/sub.h
-tensorflow/lite/kernels/internal/reference/logistic.h
-tensorflow/lite/kernels/internal/reference/lstm_cell.h
-tensorflow/lite/kernels/internal/reference/select.h
-tensorflow/lite/kernels/internal/reference/slice.h
 tensorflow/lite/kernels/internal/reference/strided_slice.h
+tensorflow/lite/kernels/internal/reference/sub.h
 tensorflow/lite/kernels/internal/reference/tanh.h
-tensorflow/lite/kernels/internal/reference/transpose.h
 tensorflow/lite/kernels/internal/reference/transpose_conv.h
+tensorflow/lite/kernels/internal/reference/transpose.h
 tensorflow/lite/kernels/internal/cppmath.h
 tensorflow/lite/kernels/internal/max.h
 tensorflow/lite/kernels/internal/min.h

@@ -20,6 +20,7 @@ limitations under the License.
 #endif  // TF_LITE_STATIC_MEMORY
 
 #include <cstring>
+#include <new>
 #include <type_traits>
 #include <utility>
 
@@ -111,6 +112,7 @@ TfLiteSparsity TfLiteSparsityClone(const TfLiteSparsity& src) {
   if (src.dim_metadata) {
     dst.dim_metadata = reinterpret_cast<TfLiteDimensionMetadata*>(
         calloc(1, sizeof(TfLiteDimensionMetadata) * src.dim_metadata_size));
+    if (src.dim_metadata_size > 0 && !dst.dim_metadata) return TfLiteSparsity();
     for (int i = 0; i < src.dim_metadata_size; ++i) {
       dst.dim_metadata[i] = src.dim_metadata[i];
       dst.dim_metadata[i].array_segments =
@@ -129,6 +131,7 @@ TfLiteSparsity* TfLiteSparsityClone(const TfLiteSparsity* const src) {
   }
   TfLiteSparsity* dst =
       reinterpret_cast<TfLiteSparsity*>(calloc(1, sizeof(TfLiteSparsity)));
+  if (!dst) return nullptr;
   *dst = TfLiteSparsityClone(*src);
   return dst;
 }
@@ -147,6 +150,7 @@ TfLiteQuantization TfLiteQuantizationClone(const TfLiteQuantization& src) {
       break;
     case kTfLiteAffineQuantization: {
       dst.params = calloc(1, sizeof(TfLiteAffineQuantization));
+      if (!dst.params) return TfLiteQuantization();
       const TfLiteAffineQuantization* const src_params =
           reinterpret_cast<TfLiteAffineQuantization*>(src.params);
       TfLiteAffineQuantization* const dst_params =
@@ -158,6 +162,7 @@ TfLiteQuantization TfLiteQuantizationClone(const TfLiteQuantization& src) {
     }
     case kTfLiteBlockwiseQuantization: {
       dst.params = calloc(1, sizeof(TfLiteBlockwiseQuantization));
+      if (!dst.params) return TfLiteQuantization();
       const TfLiteBlockwiseQuantization* const src_params =
           (TfLiteBlockwiseQuantization*)(src.params);
       TfLiteBlockwiseQuantization* const dst_params =
@@ -219,6 +224,9 @@ TfLiteFloatArray* TfLiteFloatArrayCopy(const TfLiteFloatArray* src) {
 void TfLiteFloatArrayFree(TfLiteFloatArray* a) { TfLiteVarArrayFree(a); }
 
 void TfLiteTensorDataFree(TfLiteTensor* t) {
+  if (t == nullptr) {
+    return;
+  }
   if (t->allocation_type == kTfLiteVariantObject && t->data.data) {
     delete static_cast<VariantData*>(t->data.data);
   } else if (t->allocation_type == kTfLiteDynamic ||
@@ -238,6 +246,9 @@ void TfLiteTensorDataFree(TfLiteTensor* t) {
 }
 
 void TfLiteQuantizationFree(TfLiteQuantization* quantization) {
+  if (quantization == nullptr) {
+    return;
+  }
   if (quantization->type == kTfLiteAffineQuantization) {
     TfLiteAffineQuantization* q_params =
         reinterpret_cast<TfLiteAffineQuantization*>(quantization->params);
@@ -294,6 +305,9 @@ void TfLiteSparsityFree(TfLiteSparsity* sparsity) {
 }
 
 void TfLiteTensorFree(TfLiteTensor* t) {
+  if (t == nullptr) {
+    return;
+  }
   TfLiteTensorDataFree(t);
   if (t->dims) TfLiteIntArrayFree(t->dims);
   t->dims = nullptr;
@@ -308,7 +322,7 @@ void TfLiteTensorFree(TfLiteTensor* t) {
   t->sparsity = nullptr;
 }
 
-TfLiteTensor TfLiteTensorClone(const TfLiteTensor src) {
+TfLiteTensor TfLiteTensorClone(TfLiteTensor src) {
   // We copy all of the source data first, then we clone the fields that can't
   // be shared between two tensor instances.
   TfLiteTensor dst = src;
@@ -335,16 +349,18 @@ TfLiteTensor TfLiteTensorClone(const TfLiteTensor src) {
         break;
       case kTfLiteAllocationStrategyMalloc:
         dst.data.data = malloc(src.bytes);
+        if (src.bytes > 0 && !dst.data.data) return TfLiteTensor();
         std::memcpy(dst.data.data, src.data.data, src.bytes);
         break;
       case kTfLiteAllocationStrategyNew:
         // Special case for variant objects. They are allocated using new/delete
         // but require using the `CloneTo` function.
         if (src.allocation_type == kTfLiteVariantObject) {
-          dst.data.data = reinterpret_cast<const VariantData*>(src.data.data)
-                              ->CloneTo(nullptr);
+          dst.data.data =
+              static_cast<const VariantData*>(src.data.data)->CloneTo(nullptr);
         } else {
-          dst.data.data = new char[src.bytes];
+          dst.data.data = new (std::nothrow) char[src.bytes];
+          if (src.bytes > 0 && !dst.data.data) return TfLiteTensor();
           std::memcpy(dst.data.data, src.data.data, src.bytes);
         }
         break;
@@ -394,13 +410,21 @@ TfLiteStatus TfLiteTensorCopy(const TfLiteTensor* src, TfLiteTensor* dst) {
     }
     auto* dst_vd = static_cast<VariantData*>(dst->data.data);
     auto* src_vd = static_cast<VariantData*>(src->data.data);
+    if (!src_vd) return kTfLiteError;
 
     // `CloneTo` will handle the case when `dst_vd` is nullptr, so it is safe
     // to `CloneTo` something which was "freed". Also, returning from `CloneTo`
     // will implicitly cast to `VariantData`; don't need static cast here.
     dst->data.data = src_vd->CloneTo(dst_vd);
   } else {
-    memcpy(dst->data.raw, src->data.raw, src->bytes);
+    if (dst->allocation_type == kTfLiteVariantObject) {
+      TfLiteTensorDataFree(dst);
+      dst->allocation_type = src->allocation_type;
+    }
+    if (src->bytes > 0) {
+      if (!dst->data.raw || !src->data.raw) return kTfLiteError;
+      memcpy(dst->data.raw, src->data.raw, src->bytes);
+    }
   }
   dst->buffer_handle = src->buffer_handle;
   dst->data_is_stale = src->data_is_stale;

@@ -56,6 +56,7 @@ limitations under the License.
 #include <stdbool.h>
 #include <stddef.h>
 #include <stdint.h>
+#include <stdio.h>
 
 #include "tensorflow/lite/core/c/c_api_types.h"  // IWYU pragma: export
 
@@ -277,13 +278,34 @@ void TfLiteFloatArrayFree(TfLiteFloatArray* a);
     }                                                                        \
   } while (0)
 
-#define TF_LITE_ENSURE_OK(context, status) \
-  do {                                     \
-    const TfLiteStatus s = (status);       \
-    if ((s) != kTfLiteOk) {                \
-      return s;                            \
-    }                                      \
+#ifndef TF_LITE_STRIP_ERROR_STRINGS
+#define TF_LITE_VAR_ARG_HEAD(FIRST, ...) FIRST
+#define TF_LITE_STRINGIFY_HELPER(x) #x
+#define TF_LITE_STRINGIFY(x) TF_LITE_STRINGIFY_HELPER(x)
+// Checks that `status` evaluates to `kTfLiteOk`.
+//
+// Can take a printf style log message and its parameters after the status. The
+// message will be printed using `TF_LITE_KERNEL_LOG` in case of error.
+#define TF_LITE_ENSURE_OK(context, status, ...)                              \
+  do {                                                                       \
+    const TfLiteStatus s = (status);                                         \
+    if (s != kTfLiteOk) {                                                    \
+      if (sizeof(TF_LITE_VAR_ARG_HEAD("" __VA_ARGS__)) > sizeof("")) {       \
+        TF_LITE_MAYBE_KERNEL_LOG((context), __FILE__ ":" TF_LITE_STRINGIFY(  \
+                                                __LINE__) ": " __VA_ARGS__); \
+      }                                                                      \
+      return s;                                                              \
+    }                                                                        \
   } while (0)
+#else
+#define TF_LITE_ENSURE_OK(context, status, ...) \
+  do {                                          \
+    const TfLiteStatus s = (status);            \
+    if ((s) != kTfLiteOk) {                     \
+      return s;                                 \
+    }                                           \
+  } while (0)
+#endif
 
 // `std::unreachable` not available until CC23.
 #ifdef __GNUC__  // GCC, Clang, ICC
@@ -1060,6 +1082,13 @@ typedef struct TfLiteContext {
   /// WARNING: This is an experimental interface that is subject to change.
   TfLiteStatus (*ReleaseSubgraphContext)(struct TfLiteContext* context,
                                          int subgraph_index);
+#if defined(_WIN32)
+  /// Create a array of a given `size` (uninitialized entries).
+  TfLiteIntArray* (*TfLiteIntArrayCreate)(int size);  // NOLINT
+
+  /// Free memory of array `a`.
+  void (*TfLiteIntArrayFree)(TfLiteIntArray* a);  // NOLINT
+#endif                                            // defined(_WIN32)
 } TfLiteContext;
 
 /// `TfLiteOperator` is an external version of `TfLiteRegistration`

@@ -146,6 +146,7 @@ cc_library(
     copts = tflite_copts(),
     deps = [
         ":compatibility",
+        "//tensorflow/lite/types:half",
     ],
 )
 

@@ -78,15 +78,23 @@ bool ReduceDimensionsForBroadcast(const RuntimeShape& input1_shape,
       if (!broadcast_input1) {
         broadcast_input1 = true;
         broadcast_input2 = false;
+        if (num_compressed_dims >= MAX_DIM) return false;
         num_compressed_dims++;
+        if (num_compressed_dims > MAX_DIM) {
+          return false;
+        }
       }
       compressed_input2_shape[num_compressed_dims - 1] *= input2_dim;
       compressed_output_shape[num_compressed_dims - 1] *= input2_dim;
     } else if (input2_dim == 1) {
       if (!broadcast_input2) {
         broadcast_input1 = false;
         broadcast_input2 = true;
+        if (num_compressed_dims >= MAX_DIM) return false;
         num_compressed_dims++;
+        if (num_compressed_dims > MAX_DIM) {
+          return false;
+        }
       }
       compressed_input1_shape[num_compressed_dims - 1] *= input1_dim;
       compressed_output_shape[num_compressed_dims - 1] *= input1_dim;
@@ -95,7 +103,11 @@ bool ReduceDimensionsForBroadcast(const RuntimeShape& input1_shape,
       if (broadcast_input1 || broadcast_input2 || first_nonunit) {
         broadcast_input1 = false;
         broadcast_input2 = false;
+        if (num_compressed_dims >= MAX_DIM) return false;
         num_compressed_dims++;
+        if (num_compressed_dims > MAX_DIM) {
+          return false;
+        }
       }
       compressed_input1_shape[num_compressed_dims - 1] *= input1_dim;
       compressed_input2_shape[num_compressed_dims - 1] *= input1_dim;
@@ -105,7 +117,11 @@ bool ReduceDimensionsForBroadcast(const RuntimeShape& input1_shape,
   }
   if (num_input1_dims > num_input2_dims) {
     if (!broadcast_input2) {
+      if (num_compressed_dims >= MAX_DIM) return false;
       num_compressed_dims++;
+      if (num_compressed_dims > MAX_DIM) {
+        return false;
+      }
     }
     for (size_t i = 0; i < num_input1_dims - num_input2_dims; i++) {
       const size_t input1_dim = input1_dims[i];
@@ -117,7 +133,11 @@ bool ReduceDimensionsForBroadcast(const RuntimeShape& input1_shape,
     }
   } else if (num_input2_dims > num_input1_dims) {
     if (!broadcast_input1) {
+      if (num_compressed_dims >= MAX_DIM) return false;
       num_compressed_dims++;
+      if (num_compressed_dims > MAX_DIM) {
+        return false;
+      }
     }
     for (size_t i = 0; i < num_input2_dims - num_input1_dims; i++) {
       const size_t input2_dim = input2_dims[i];

@@ -23,6 +23,7 @@ limitations under the License.
 #include "fixedpoint/fixedpoint.h"
 #include "tensorflow/lite/kernels/internal/common.h"
 #include "tensorflow/lite/kernels/internal/compatibility.h"
+#include "tensorflow/lite/kernels/internal/reference/broadcast_loop.h"
 
 namespace tflite {
 
@@ -39,7 +40,7 @@ inline void Add(const ArithmeticParams& params,
   const int flat_size =
       MatchingElementsSize(input1_shape, input2_shape, output_shape);
   for (int i = 0; i < flat_size; ++i) {
-    output_data[i] = ActivationFunctionWithMinMax(
+    output_data[i] = ActivationFunctionWithMinMax<T>(
         input1_data[i] + input2_data[i], activation_min, activation_max);
   }
 }
@@ -328,6 +329,20 @@ BroadcastAdd6DSlow(const ArithmeticParams& params,
   constexpr int kMaxBroadcastDim = 6;
   T activation_min, activation_max;
   GetActivationParams(params, &activation_min, &activation_max);
+  const int broadcast_rank = std::max(
+      output_shape.DimensionsCount(),
+      std::max(input1_shape.DimensionsCount(), input2_shape.DimensionsCount()));
+  if (broadcast_rank > kMaxBroadcastDim) {
+    ForEachBroadcastedElement(
+        input1_shape, input2_shape, output_shape,
+        [&](int output_index, int input1_index, int input2_index) {
+          output_data[output_index] = ActivationFunctionWithMinMax(
+              static_cast<T>(input1_data[input1_index] +
+                             input2_data[input2_index]),
+              activation_min, activation_max);
+        });
+    return;
+  }
 
   // In Tensorflow, the dimensions are canonically named (batch_number, row,
   // col, channel), with extents (batches, height, width, depth), with the
@@ -421,6 +436,19 @@ BroadcastAdd6DSlow(const ArithmeticParams& params,
                    const RuntimeShape& input2_shape, const T* input2_data,
                    const RuntimeShape& output_shape, T* output_data) {
   constexpr int kMaxBroadcastDim = 6;
+  const int broadcast_rank = std::max(
+      output_shape.DimensionsCount(),
+      std::max(input1_shape.DimensionsCount(), input2_shape.DimensionsCount()));
+  if (broadcast_rank > kMaxBroadcastDim) {
+    ForEachBroadcastedElement(
+        input1_shape, input2_shape, output_shape,
+        [&](int output_index, int input1_index, int input2_index) {
+          AddElementwise(1, params, input1_data + input1_index,
+                         input2_data + input2_index,
+                         output_data + output_index);
+        });
+    return;
+  }
 
   // In Tensorflow, the dimensions are canonically named (batch_number, row,
   // col, channel), with extents (batches, height, width, depth), with the