diff --git a/paddle/phi/common/data_type.h b/paddle/phi/common/data_type.h
index 9584709067854..985f51c6d0ce4 100644
--- a/paddle/phi/common/data_type.h
+++ b/paddle/phi/common/data_type.h
@@ -328,6 +328,20 @@ inline DataType StringToDataType(const std::string& dtype) {
   }
 }
 
+inline bool IsFloatingType(const DataType& type) {
+  return (type == DataType::FLOAT16 || type == DataType::BFLOAT16 ||
+          type == DataType::FLOAT32 || type == DataType::FLOAT64 ||
+          type == DataType::FLOAT8_E4M3FN || type == DataType::FLOAT8_E5M2);
+}
+
+inline bool IsIntegerType(const DataType& type) {
+  return (type == DataType::INT8 || type == DataType::INT16 ||
+          type == DataType::INT32 || type == DataType::INT64 ||
+          type == DataType::UINT8 || type == DataType::UINT16 ||
+          type == DataType::UINT32 || type == DataType::UINT64 ||
+          type == DataType::BOOL);
+}
+
 }  // namespace phi
 
 namespace paddle {
diff --git a/paddle/phi/infermeta/ternary.cc b/paddle/phi/infermeta/ternary.cc
index c7bdf0ad18de4..a7fac52114130 100644
--- a/paddle/phi/infermeta/ternary.cc
+++ b/paddle/phi/infermeta/ternary.cc
@@ -882,9 +882,61 @@ void FlashAttnV3VarlenInferMeta(const MetaTensor& q,
   softmax_lse->set_dtype(DataType::FLOAT32);
 }
 
+void ArangeTensorInferMetaLegacy(const MetaTensor& start,
+                                 const MetaTensor& end,
+                                 const MetaTensor& step,
+                                 MetaTensor* out) {
+  PADDLE_ENFORCE_EQ(common::product(start.dims()),
+                    1,
+                    common::errors::InvalidArgument(
+                        "The numel of Input(start) should be 1, but got %d",
+                        common::product(start.dims())));
+
+  PADDLE_ENFORCE_EQ(common::product(end.dims()),
+                    1,
+                    common::errors::InvalidArgument(
+                        "The numel of Input(end) should be 1, but got %d",
+                        common::product(end.dims())));
+
+  PADDLE_ENFORCE_EQ(common::product(step.dims()),
+                    1,
+                    common::errors::InvalidArgument(
+                        "The numel of Input(step) should be 1, but got %d",
+                        common::product(step.dims())));
+
+  out->set_dims({-1});
+  out->set_dtype(start.dtype());
+}
+
+void RangeTensorInferMetaLegacy(const MetaTensor& start,
+                                const MetaTensor& end,
+                                const MetaTensor& step,
+                                MetaTensor* out) {
+  PADDLE_ENFORCE_EQ(common::product(start.dims()),
+                    1,
+                    common::errors::InvalidArgument(
+                        "The numel of Input(start) should be 1, but got %d",
+                        common::product(start.dims())));
+
+  PADDLE_ENFORCE_EQ(common::product(end.dims()),
+                    1,
+                    common::errors::InvalidArgument(
+                        "The numel of Input(end) should be 1, but got %d",
+                        common::product(end.dims())));
+
+  PADDLE_ENFORCE_EQ(common::product(step.dims()),
+                    1,
+                    common::errors::InvalidArgument(
+                        "The numel of Input(step) should be 1, but got %d",
+                        common::product(step.dims())));
+
+  out->set_dims({-1});
+  out->set_dtype(start.dtype());
+}
 void ArangeTensorInferMeta(const MetaTensor& start,
                            const MetaTensor& end,
                            const MetaTensor& step,
+                           DataType dtype,
                            MetaTensor* out) {
   PADDLE_ENFORCE_EQ(common::product(start.dims()),
                     1,
@@ -905,12 +957,13 @@ void ArangeTensorInferMeta(const MetaTensor& start,
                         common::product(step.dims())));
 
   out->set_dims({-1});
-  out->set_dtype(start.dtype());
+  out->set_dtype(dtype);
 }
 
 void RangeTensorInferMeta(const MetaTensor& start,
                           const MetaTensor& end,
                           const MetaTensor& step,
+                          DataType dtype,
                           MetaTensor* out) {
   PADDLE_ENFORCE_EQ(common::product(start.dims()),
                     1,
@@ -931,7 +984,7 @@ void RangeTensorInferMeta(const MetaTensor& start,
                         common::product(step.dims())));
 
   out->set_dims({-1});
-  out->set_dtype(start.dtype());
+  out->set_dtype(dtype);
 }
 
 void CollectFpnProposalsInferMeta(
diff --git a/paddle/phi/infermeta/ternary.h b/paddle/phi/infermeta/ternary.h
index d2060d7040742..964464adf4518 100644
--- a/paddle/phi/infermeta/ternary.h
+++ b/paddle/phi/infermeta/ternary.h
@@ -65,13 +65,25 @@ PADDLE_API void AffineChannelInferMeta(const MetaTensor& x,
 PADDLE_API void ArangeTensorInferMeta(const MetaTensor& start,
                                       const MetaTensor& end,
                                       const MetaTensor& step,
+                                      DataType dtype,
                                       MetaTensor* out);
 
 PADDLE_API void RangeTensorInferMeta(const MetaTensor& start,
                                      const MetaTensor& end,
                                      const MetaTensor& step,
+                                     DataType dtype,
                                      MetaTensor* out);
 
+PADDLE_API void ArangeTensorInferMetaLegacy(const MetaTensor& start,
+                                            const MetaTensor& end,
+                                            const MetaTensor& step,
+                                            MetaTensor* out);
+
+PADDLE_API void RangeTensorInferMetaLegacy(const MetaTensor& start,
+                                           const MetaTensor& end,
+                                           const MetaTensor& step,
+                                           MetaTensor* out);
+
 PADDLE_API void AssignPosInferMeta(const MetaTensor& x,
                                    const MetaTensor& cum_count,
                                    const MetaTensor& eff_num_len,
diff --git a/paddle/phi/kernels/cpu/arange_kernel.cc b/paddle/phi/kernels/cpu/arange_kernel.cc
index 4e5cec7286866..0626349b033b0 100644
--- a/paddle/phi/kernels/cpu/arange_kernel.cc
+++ b/paddle/phi/kernels/cpu/arange_kernel.cc
@@ -15,6 +15,7 @@ limitations under the License. */
 #include "paddle/phi/kernels/arange_kernel.h"
 
 #include "paddle/phi/backends/cpu/cpu_context.h"
+#include "paddle/phi/common/amp_type_traits.h"
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/kernels/funcs/range_function.h"
 
@@ -43,10 +44,38 @@ void ArangeTensorKernel(const Context& dev_ctx,
                         const DenseTensor& end,
                         const DenseTensor& step,
                         DenseTensor* out) {
-  T start_value = start.data<T>()[0];
-  T end_value = end.data<T>()[0];
-  T step_value = step.data<T>()[0];
-  ArangeFunc<T, Context>(dev_ctx, start_value, end_value, step_value, out);
+  int64_t size = 0;
+  using MPType = typename phi::dtype::MPTypeTrait<T>::Type;
+
+  bool any_float = phi::IsFloatingType(start.dtype()) ||
+                   phi::IsFloatingType(end.dtype()) ||
+                   phi::IsFloatingType(step.dtype());
+
+  Scalar start_scalar(start);
+  Scalar end_scalar(end);
+  Scalar step_scalar(step);
+
+  if (any_float) {
+    double sv = start_scalar.to<double>();
+    double ev = end_scalar.to<double>();
+    double stv = step_scalar.to<double>();
+    funcs::GetSize<double>(sv, ev, stv, &size);
+  } else {
+    int64_t sv = start_scalar.to<int64_t>();
+    int64_t ev = end_scalar.to<int64_t>();
+    int64_t stv = step_scalar.to<int64_t>();
+    funcs::GetSize<int64_t>(sv, ev, stv, &size);
+  }
+  MPType start_value = start_scalar.to<MPType>();
+  MPType step_value = step_scalar.to<MPType>();
+
+  out->Resize({size});
+  T* out_data = dev_ctx.template Alloc<T>(out);
+  MPType value = start_value;
+  for (int64_t i = 0; i < size; ++i) {
+    out_data[i] = static_cast<T>(value);
+    value += step_value;
+  }
 }
 
 template <typename T, typename Context>
@@ -55,10 +84,31 @@ void ArangeKernel(const Context& dev_ctx,
                   const Scalar& end,
                   const Scalar& step,
                   DenseTensor* out) {
-  T start_value = start.to<T>();
-  T end_value = end.to<T>();
-  T step_value = step.to<T>();
-  ArangeFunc<T, Context>(dev_ctx, start_value, end_value, step_value, out);
+  bool any_float = phi::IsFloatingType(start.dtype()) ||
+                   phi::IsFloatingType(end.dtype()) ||
+                   phi::IsFloatingType(step.dtype());
+  int64_t size = 0;
+  using MPType = typename phi::dtype::MPTypeTrait<T>::Type;
+  if (any_float) {
+    double sv = start.to<double>();
+    double ev = end.to<double>();
+    double stv = step.to<double>();
+    funcs::GetSize<double>(sv, ev, stv, &size);
+  } else {
+    int64_t sv = start.to<int64_t>();
+    int64_t ev = end.to<int64_t>();
+    int64_t stv = step.to<int64_t>();
+    funcs::GetSize<int64_t>(sv, ev, stv, &size);
+  }
+  MPType start_value = start.to<MPType>();
+  MPType step_value = step.to<MPType>();
+  out->Resize({size});
+  T* out_data = dev_ctx.template Alloc<T>(out);
+  MPType value = start_value;
+  for (int64_t i = 0; i < size; ++i) {
+    out_data[i] = static_cast<T>(value);
+    value += step_value;
+  }
 }
 
 }  // namespace phi
diff --git a/paddle/phi/kernels/cpu/range_kernel.cc b/paddle/phi/kernels/cpu/range_kernel.cc
index e664c73cf6d95..866fe8dd3270c 100644
--- a/paddle/phi/kernels/cpu/range_kernel.cc
+++ b/paddle/phi/kernels/cpu/range_kernel.cc
@@ -15,6 +15,7 @@ limitations under the License. */
 #include "paddle/phi/kernels/range_kernel.h"
 
 #include "paddle/phi/backends/cpu/cpu_context.h"
+#include "paddle/phi/common/amp_type_traits.h"
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/kernels/funcs/range_function.h"
 
@@ -46,13 +47,27 @@ void RangeTensorKernel(const Context& dev_ctx,
                        const DenseTensor& end,
                        const DenseTensor& step,
                        DenseTensor* out) {
-  T start_value = start.data<T>()[0];
-  T end_value = end.data<T>()[0];
-  T step_value = step.data<T>()[0];
-  if (step_value == static_cast<T>(0)) {
-    PADDLE_THROW(errors::InvalidArgument("step must be nonzero."));
+  int64_t size = 0;
+  using MPType = typename phi::dtype::MPTypeTrait<T>::Type;
+  Scalar start_scalar(start);
+  Scalar end_scalar(end);
+  Scalar step_scalar(step);
+  MPType start_value = start_scalar.to<MPType>();
+  MPType end_value = end_scalar.to<MPType>();
+  MPType step_value = step_scalar.to<MPType>();
+
+  funcs::GetSizeForRange(start_value, end_value, step_value, &size);
+
+  out->Resize({size});
+  T* out_data = dev_ctx.template Alloc<T>(out);
+  if (size == 0) {
+    return;
+  }
+  MPType value = start_value;
+  for (int64_t i = 0; i < size; ++i) {
+    out_data[i] = static_cast<T>(value);
+    value += step_value;
   }
-  RangeFunc<T, Context>(dev_ctx, start_value, end_value, step_value, out);
 }
 
 template <typename T, typename Context>
@@ -61,16 +76,22 @@ void RangeKernel(const Context& dev_ctx,
                  const Scalar& end,
                  const Scalar& step,
                  DenseTensor* out) {
-  T start_value = start.to<T>();
-  T end_value = end.to<T>();
-  T step_value = step.to<T>();
-  if constexpr (std::is_floating_point_v<T>) {
-    if (std::isnan(end_value)) {
-      PADDLE_THROW(common::errors::InvalidArgument(
-          "The end value of range cannot be NaN. Please check your input."));
-    }
+  int64_t size = 0;
+  using MPType = typename phi::dtype::MPTypeTrait<T>::Type;
+  MPType start_value = start.to<MPType>();
+  MPType end_value = end.to<MPType>();
+  MPType step_value = step.to<MPType>();
+  funcs::GetSizeForRange(start_value, end_value, step_value, &size);
+  out->Resize({size});
+  T* out_data = dev_ctx.template Alloc<T>(out);
+  if (size == 0) {
+    return;
+  }
+  MPType value = start_value;
+  for (int64_t i = 0; i < size; ++i) {
+    out_data[i] = static_cast<T>(value);
+    value += step_value;
   }
-  RangeFunc<T, Context>(dev_ctx, start_value, end_value, step_value, out);
 }
 
 }  // namespace phi
diff --git a/paddle/phi/kernels/funcs/range_function.h b/paddle/phi/kernels/funcs/range_function.h
index b0aa8e8556ba0..f9b79bbb2ffa6 100644
--- a/paddle/phi/kernels/funcs/range_function.h
+++ b/paddle/phi/kernels/funcs/range_function.h
@@ -13,6 +13,8 @@
 // limitations under the License.
 
 #pragma once
+#include <cmath>
+#include <type_traits>
 #include "paddle/phi/core/enforce.h"
 
 namespace phi {
@@ -61,5 +63,37 @@ void GetSize(T start, T end, T step, int64_t* size) {
               : std::ceil(std::abs((end - start) / step));
 }
 
+template <typename T>
+void GetSizeForRange(T start, T end, T step, int64_t* size) {
+  // For range op: closed interval [start, end]
+  PADDLE_ENFORCE_NE(
+      step,
+      0,
+      common::errors::InvalidArgument("The step of range op should not be 0."));
+
+  if constexpr (std::is_same_v<T, phi::bfloat16> ||
+                std::is_same_v<T, phi::float16>) {
+    PADDLE_ENFORCE_EQ(
+        phi::dtype::isfinite(start) && phi::dtype::isfinite(end),
+        true,
+        common::errors::InvalidArgument(
+            "The start, end and step of range op should be finite "
+            "numbers, but received start=%f, end=%f.",
+            static_cast<double>(start),
+            static_cast<double>(end)));
+  } else if constexpr (std::is_floating_point_v<T>) {
+    PADDLE_ENFORCE_EQ(
+        std::isfinite(start) && std::isfinite(end),
+        true,
+        common::errors::InvalidArgument(
+            "The start, end and step of range op should be finite "
+            "numbers, but received start=%f, end=%f.",
+            static_cast<double>(start),
+            static_cast<double>(end)));
+  }
+  // Closed interval [start, end], so we add 1
+  *size = static_cast<int64_t>(((end - start) / step) + 1);
+}
+
 }  // namespace funcs
 }  // namespace phi
diff --git a/paddle/phi/kernels/gpu/arange_kernel.cu b/paddle/phi/kernels/gpu/arange_kernel.cu
index e0fd49d1898b8..8ce3263e8cf21 100644
--- a/paddle/phi/kernels/gpu/arange_kernel.cu
+++ b/paddle/phi/kernels/gpu/arange_kernel.cu
@@ -20,6 +20,7 @@
 #include "paddle/phi/core/enforce.h"
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/core/tensor_utils.h"
+#include "paddle/phi/core/visit_type.h"
 #include "paddle/phi/kernels/funcs/range_function.h"
 
 namespace phi {
@@ -37,15 +38,29 @@ void ArangeTensorKernel(const Context& dev_ctx,
                         const DenseTensor& end,
                         const DenseTensor& step,
                         DenseTensor* out) {
+  bool any_float = phi::IsFloatingType(start.dtype()) ||
+                   phi::IsFloatingType(end.dtype()) ||
+                   phi::IsFloatingType(step.dtype());
+  int64_t size = 0;
   using MPType = typename phi::dtype::MPTypeTrait<T>::Type;
-  MPType start_value =
-      static_cast<MPType>(GetValue<T, Context>(dev_ctx, start));
-  MPType end_value = static_cast<MPType>(GetValue<T, Context>(dev_ctx, end));
-  MPType step_value = static_cast<MPType>(GetValue<T, Context>(dev_ctx, step));
+  Scalar start_scalar(start);
+  Scalar end_scalar(end);
+  Scalar step_scalar(step);
+  if (any_float) {
+    double sv = start_scalar.to<double>();
+    double ev = end_scalar.to<double>();
+    double stv = step_scalar.to<double>();
+    funcs::GetSize<double>(sv, ev, stv, &size);
+  } else {
+    int64_t sv = start_scalar.to<int64_t>();
+    int64_t ev = end_scalar.to<int64_t>();
+    int64_t stv = step_scalar.to<int64_t>();
+    funcs::GetSize<int64_t>(sv, ev, stv, &size);
+  }
+  MPType start_value = start_scalar.to<MPType>();
+  MPType step_value = step_scalar.to<MPType>();
 
-  int64_t size = 0;
-  funcs::GetSize(start_value, end_value, step_value, &size);
-  out->Resize(common::make_ddim({size}));
+  out->Resize({size});
   T* out_data = dev_ctx.template Alloc<T>(out);
 
   auto stream = dev_ctx.stream();
@@ -89,11 +104,35 @@ void ArangeKernel(const Context& dev_ctx,
                   const Scalar& end,
                   const Scalar& step,
                   DenseTensor* out) {
-  T start_value = start.to<T>();
-  T end_value = end.to<T>();
-  T step_value = step.to<T>();
-  ArangeNullaryKernel<T, Context>(
-      dev_ctx, start_value, end_value, step_value, out);
+  bool is_floating = phi::IsFloatingType(start.dtype()) ||
+                     phi::IsFloatingType(end.dtype()) ||
+                     phi::IsFloatingType(step.dtype());
+  using MPType = typename phi::dtype::MPTypeTrait<T>::Type;
+  int64_t size = 0;
+  if (is_floating) {
+    double sv = start.to<double>();
+    double ev = end.to<double>();
+    double stv = step.to<double>();
+    funcs::GetSize<double>(sv, ev, stv, &size);
+  } else {
+    int64_t sv = start.to<int64_t>();
+    int64_t ev = end.to<int64_t>();
+    int64_t stv = step.to<int64_t>();
+    funcs::GetSize<int64_t>(sv, ev, stv, &size);
+  }
+  MPType start_value = start.to<MPType>();
+  MPType step_value = step.to<MPType>();
+  out->Resize({size});
+  T* out_data = dev_ctx.template Alloc<T>(out);
+
+  auto stream = dev_ctx.stream();
+  int64_t block = std::min(size, static_cast<int64_t>(256));
+  if (block == 0) {
+    return;
+  }
+  int64_t grid = (size + block - 1) / block;
+  Range<MPType, T>
+      <<<grid, block, 0, stream>>>(start_value, step_value, size, out_data);
 }
 
 template decltype(ArangeNullaryKernel<int64_t, phi::GPUContext>)
diff --git a/paddle/phi/kernels/gpu/range_kernel.cu b/paddle/phi/kernels/gpu/range_kernel.cu
index bb90516ed2b8a..baffda060a761 100644
--- a/paddle/phi/kernels/gpu/range_kernel.cu
+++ b/paddle/phi/kernels/gpu/range_kernel.cu
@@ -14,12 +14,16 @@
 
 #include "paddle/phi/kernels/range_kernel.h"
 
+#include <type_traits>
+
 #include "paddle/common/errors.h"
 #include "paddle/phi/backends/gpu/gpu_context.h"
 #include "paddle/phi/common/amp_type_traits.h"
+#include "paddle/phi/common/data_type.h"
 #include "paddle/phi/core/enforce.h"
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/core/tensor_utils.h"
+#include "paddle/phi/core/visit_type.h"
 #include "paddle/phi/kernels/funcs/range_function.h"
 
 namespace phi {
@@ -37,17 +41,18 @@ void RangeTensorKernel(const Context& dev_ctx,
                        const DenseTensor& end,
                        const DenseTensor& step,
                        DenseTensor* out) {
+  int64_t size = 0;
   using MPType = typename phi::dtype::MPTypeTrait<T>::Type;
-  MPType start_value =
-      static_cast<MPType>(GetValue<T, Context>(dev_ctx, start));
-  MPType end_value = static_cast<MPType>(GetValue<T, Context>(dev_ctx, end));
-  MPType step_value = static_cast<MPType>(GetValue<T, Context>(dev_ctx, step));
-  if (step_value == static_cast<MPType>(0)) {
-    PADDLE_THROW(common::errors::InvalidArgument("step must be nonzero."));
-  }
-  int64_t size =
-      static_cast<int64_t>(((end_value - start_value) / step_value) + 1);
-  out->Resize(common::make_ddim({size}));
+  Scalar start_scalar(start);
+  Scalar end_scalar(end);
+  Scalar step_scalar(step);
+
+  MPType start_value = start_scalar.to<MPType>();
+  MPType end_value = end_scalar.to<MPType>();
+  MPType step_value = step_scalar.to<MPType>();
+  funcs::GetSizeForRange<MPType>(start_value, end_value, step_value, &size);
+
+  out->Resize({size});
   T* out_data = dev_ctx.template Alloc<T>(out);
 
   auto stream = dev_ctx.stream();
@@ -108,25 +113,26 @@ void RangeKernel(const Context& dev_ctx,
                  const Scalar& end,
                  const Scalar& step,
                  DenseTensor* out) {
-  T start_value = start.to<T>();
-  T end_value = end.to<T>();
-  T step_value = step.to<T>();
-  if constexpr (std::is_same_v<T, float>) {
-    if (std::isnan(end_value)) {
-      PADDLE_THROW(common::errors::InvalidArgument(
-          "The end value of range cannot be NaN. Please check your input."));
-    }
-  } else if constexpr (std::is_same_v<T, double>) {
-    if (std::isnan(end_value)) {
-      PADDLE_THROW(common::errors::InvalidArgument(
-          "The end value of range cannot be NaN. Please check your input."));
-    }
+  int64_t size = 0;
+  using MPType = typename phi::dtype::MPTypeTrait<T>::Type;
+  MPType start_value = start.to<MPType>();
+  MPType end_value = end.to<MPType>();
+  MPType step_value = step.to<MPType>();
+  funcs::GetSizeForRange<MPType>(start_value, end_value, step_value, &size);
+  out->Resize({size});
+  T* out_data = dev_ctx.template Alloc<T>(out);
+  if (size == 0) {
+    return;
   }
-  if (step_value == static_cast<T>(0)) {
-    PADDLE_THROW(common::errors::InvalidArgument("step must be nonzero."));
+
+  auto stream = dev_ctx.stream();
+  int64_t block = std::min(size, static_cast<int64_t>(256));
+  if (block == 0) {
+    return;
   }
-  RangeNullaryKernel<T, Context>(
-      dev_ctx, start_value, end_value, step_value, out);
+  int64_t grid = (size + block - 1) / block;
+  Range<MPType, T>
+      <<<grid, block, 0, stream>>>(start_value, step_value, size, out_data);
 }
 
 template decltype(RangeNullaryKernel<int64_t, phi::GPUContext>)
diff --git a/paddle/phi/kernels/xpu/arange_kernel.cc b/paddle/phi/kernels/xpu/arange_kernel.cc
index 2e580adc4ffea..69de5aeed65ee 100644
--- a/paddle/phi/kernels/xpu/arange_kernel.cc
+++ b/paddle/phi/kernels/xpu/arange_kernel.cc
@@ -16,6 +16,7 @@ limitations under the License. */
 #include "paddle/phi/backends/xpu/enforce_xpu.h"
 #include "paddle/phi/common/amp_type_traits.h"
 #include "paddle/phi/core/kernel_registry.h"
+#include "paddle/phi/core/visit_type.h"
 #include "paddle/phi/kernels/funcs/range_function.h"
 
 namespace phi {
@@ -26,15 +27,32 @@ void ArangeTensorKernel(const Context& dev_ctx,
                         const DenseTensor& end,
                         const DenseTensor& step,
                         DenseTensor* out) {
-  using MPType = typename phi::dtype::MPTypeTrait<T>::Type;
+  bool any_float = phi::IsFloatingType(start.dtype()) ||
+                   phi::IsFloatingType(end.dtype()) ||
+                   phi::IsFloatingType(step.dtype());
+  int64_t size = 0;
   using XPUType = typename XPUTypeTrait<T>::Type;
-  MPType start_value =
-      static_cast<MPType>(GetValue<T, Context>(dev_ctx, start));
-  MPType end_value = static_cast<MPType>(GetValue<T, Context>(dev_ctx, end));
-  MPType step_value = static_cast<MPType>(GetValue<T, Context>(dev_ctx, step));
+  Scalar start_scalar(start);
+  Scalar end_scalar(end);
+  Scalar step_scalar(step);
+  XPUType start_value;
+  XPUType step_value;
 
-  int64_t size = 0;
-  phi::funcs::GetSize(start_value, end_value, step_value, &size);
+  if (any_float) {
+    double sv = start_scalar.to<double>();
+    double ev = end_scalar.to<double>();
+    double stv = step_scalar.to<double>();
+    funcs::GetSize<double>(sv, ev, stv, &size);
+    start_value = static_cast<XPUType>(sv);
+    step_value = static_cast<XPUType>(stv);
+  } else {
+    int64_t sv = start_scalar.to<int64_t>();
+    int64_t ev = end_scalar.to<int64_t>();
+    int64_t stv = step_scalar.to<int64_t>();
+    funcs::GetSize<int64_t>(sv, ev, stv, &size);
+    start_value = static_cast<XPUType>(sv);
+    step_value = static_cast<XPUType>(stv);
+  }
   if (size == 0) {
     out->Resize(common::make_ddim({0}));
     dev_ctx.template Alloc<T>(out);
@@ -44,11 +62,8 @@ void ArangeTensorKernel(const Context& dev_ctx,
   XPUType* out_data =
       reinterpret_cast<XPUType*>(dev_ctx.template Alloc<T>(out));
 
-  int ret = xpu::range(dev_ctx.x_context(),
-                       out_data,
-                       static_cast<XPUType>(start_value),
-                       static_cast<XPUType>(step_value),
-                       size);
+  int ret =
+      xpu::range(dev_ctx.x_context(), out_data, start_value, step_value, size);
   PADDLE_ENFORCE_XDNN_SUCCESS(ret, "range");
 }
 
diff --git a/paddle/phi/ops/yaml/inconsistent/dygraph_ops.yaml b/paddle/phi/ops/yaml/inconsistent/dygraph_ops.yaml
index 5514ee86abbea..14e22c95c4502 100755
--- a/paddle/phi/ops/yaml/inconsistent/dygraph_ops.yaml
+++ b/paddle/phi/ops/yaml/inconsistent/dygraph_ops.yaml
@@ -24,14 +24,12 @@
   output : Tensor(out)
   infer_meta :
     func : ArangeTensorInferMeta
-    param : [start, end, step]
+    param : [start, end, step, dtype]
   kernel :
     func : arange_tensor
     param : [start, end, step]
     data_type : dtype
     backend : place
-  data_transform :
-    support_trans_dtype : start, end, step
   traits : paddle::dialect::ForwardOnlyTrait
 
 - op : assign
@@ -316,14 +314,12 @@
   output : Tensor(out)
   infer_meta :
     func : RangeTensorInferMeta
-    param : [start, end, step]
+    param : [start, end, step, dtype]
   kernel :
     func : range_tensor
     param : [start, end, step]
     data_type : dtype
     backend : place
-  data_transform :
-    support_trans_dtype : start, end, step
   traits : paddle::dialect::ForwardOnlyTrait
 
 - op : remainder
diff --git a/paddle/phi/ops/yaml/legacy/static_ops.yaml b/paddle/phi/ops/yaml/legacy/static_ops.yaml
index 8b2eabf078c38..0983caf647f8d 100755
--- a/paddle/phi/ops/yaml/legacy/static_ops.yaml
+++ b/paddle/phi/ops/yaml/legacy/static_ops.yaml
@@ -44,7 +44,7 @@
   args : (Tensor start, Tensor end, Tensor step)
   output : Tensor(out)
   infer_meta :
-    func : ArangeTensorInferMeta
+    func : ArangeTensorInferMetaLegacy
   kernel :
     func : arange_tensor
   data_transform :
@@ -900,7 +900,7 @@
   args : (Tensor start, Tensor end, Tensor step)
   output : Tensor(out)
   infer_meta :
-    func : RangeTensorInferMeta
+    func : RangeTensorInferMetaLegacy
   kernel :
     func : range_tensor
   data_transform :
diff --git a/python/paddle/jit/dy2static/convert_operators.py b/python/paddle/jit/dy2static/convert_operators.py
index 0ac1bdc883f69..113909aee1df3 100644
--- a/python/paddle/jit/dy2static/convert_operators.py
+++ b/python/paddle/jit/dy2static/convert_operators.py
@@ -702,6 +702,30 @@ def convert_enumerate(*args):
 
 def convert_range(*args):
     has_variable = any(isinstance(x, (Variable, Value)) for x in args)
+    # NOTE(SigureMo): Add an `Assign` OP after the Tensor input to mark it as a variable, which can
+    # avoid confusing it with the scalar case in `arange` API.
+    # For example:
+    # ```python
+    # l = []
+    # for i in range(n):
+    #    l.append(i)
+    # ```
+    # - If `n` is a scalar (e.g., `n=10`), we expect to create an `ArangeOp` with a fixed output shape [10].
+    # - If `n` is a Tensor (e.g., `n=full([], 10, "int32")`), we expect to create an `ArangeOp` with a dynamic
+    # output shape [-1]. To ensure the python level and graph level all recognize this is data-dependent control
+    # flow.
+    # However, we can't distinguish the scalar case and the Tensor case when creating the `ArangeOp`. Because
+    # the scalar case also be convert as a `Full` OP output.
+    # So we add an `Assign` OP after the Tensor input to **mark** it as a variable, which can avoid confusing
+    # it with the scalar case.
+    is_full_op_output = lambda x: (
+        isinstance(x, Value)
+        and x.get_defining_op()
+        and x.get_defining_op().name() == "pd_op.full"
+    )
+    args = [
+        paddle.assign(arg) if is_full_op_output(arg) else arg for arg in args
+    ]
     if has_variable:
         if len(args) == 1:
             return paddle.arange(0, args[0], 1, "int64")
diff --git a/python/paddle/tensor/creation.py b/python/paddle/tensor/creation.py
index 6e16bac2914e0..d64476686c62d 100644
--- a/python/paddle/tensor/creation.py
+++ b/python/paddle/tensor/creation.py
@@ -19,7 +19,7 @@
 import numbers
 import re
 import warnings
-from typing import TYPE_CHECKING, overload
+from typing import TYPE_CHECKING, Any, overload
 
 import numpy as np
 
@@ -2223,20 +2223,19 @@ def arange(
         end = start
         start = 0
 
+    is_all_integer = True
     if dtype is None:
+        # Check if start/end/step contain floating point values
         for val in [start, end, step]:
             if isinstance(val, (Variable, paddle.pir.Value)):
                 if not paddle.is_integer(val):
-                    dtype = paddle.get_default_dtype()
+                    is_all_integer = False
                     break
-                else:
-                    dtype = 'int64'
             else:
-                if not isinstance(val, np.integer) and not isinstance(val, int):
-                    dtype = paddle.get_default_dtype()
+                if isinstance(val, (float, np.floating)):
+                    is_all_integer = False
                     break
-                else:
-                    dtype = 'int64'
+        dtype = 'int64' if is_all_integer else paddle.get_default_dtype()
 
     out_shape = None
     is_value_input = (
@@ -2300,13 +2299,13 @@ def arange(
                 raise ValueError(
                     f"The value of start must be finite, but received: {start}."
                 )
-            start = fill_constant([1], dtype, start, force_cpu=True)
-    elif start.dtype != dtype:
+            start_dtype = np.array(start).dtype
+            start = fill_constant([1], start_dtype, start, force_cpu=True)
+    else:
         if in_dynamic_mode() and not paddle.isfinite(start):
             raise ValueError(
                 f"The value of start must be finite, but received: {start}."
             )
-        start = paddle.cast(start, dtype)
 
     if not isinstance(end, (Variable, paddle.pir.Value)):
         with device_guard("cpu"):
@@ -2314,19 +2313,23 @@ def arange(
                 raise ValueError(
                     f"The value of end must be finite, but received: {end}."
                 )
-            end = fill_constant([1], dtype, end, force_cpu=True)
-    elif end.dtype != dtype:
+            end_dtype = np.array(end).dtype
+            end = fill_constant([1], end_dtype, end, force_cpu=True)
+    else:
         if in_dynamic_mode() and not paddle.isfinite(end):
             raise ValueError(
                 f"The value of end must be finite, but received: {end}."
             )
-        end = paddle.cast(end, dtype)
 
     if not isinstance(step, (Variable, paddle.pir.Value)):
         with device_guard("cpu"):
-            step = fill_constant([1], dtype, step, force_cpu=True)
-    elif step.dtype != dtype:
-        step = paddle.cast(step, dtype)
+            step_dtype = np.array(step).dtype
+            step = fill_constant([1], step_dtype, step, force_cpu=True)
+    else:
+        if in_dynamic_mode() and not paddle.isfinite(step):
+            raise ValueError(
+                f"The value of step must be finite, but received: {step}."
+            )
 
     if in_dynamic_or_pir_mode():
         tensor = _C_ops.arange(
@@ -2488,21 +2491,18 @@ def range(
 
     if not isinstance(start, (Variable, paddle.pir.Value)):
         with device_guard("cpu"):
-            start = fill_constant([1], dtype, start, force_cpu=True)
-    elif start.dtype != dtype:
-        start = paddle.cast(start, dtype)
+            start_dtype = np.array(start).dtype
+            start = fill_constant([1], start_dtype, start, force_cpu=True)
 
     if not isinstance(end, (Variable, paddle.pir.Value)):
         with device_guard("cpu"):
-            end = fill_constant([1], dtype, end, force_cpu=True)
-    elif end.dtype != dtype:
-        end = paddle.cast(end, dtype)
+            end_dtype = np.array(end).dtype
+            end = fill_constant([1], end_dtype, end, force_cpu=True)
 
     if not isinstance(step, (Variable, paddle.pir.Value)):
         with device_guard("cpu"):
-            step = fill_constant([1], dtype, step, force_cpu=True)
-    elif step.dtype != dtype:
-        step = paddle.cast(step, dtype)
+            step_dtype = np.array(step).dtype
+            step = fill_constant([1], step_dtype, step, force_cpu=True)
 
     tensor = _C_ops.range_v2(
         start,
diff --git a/test/legacy_test/test_range_and_arange.py b/test/legacy_test/test_range_and_arange.py
index 8013f7a1a63ae..0eb4ba74754d3 100644
--- a/test/legacy_test/test_range_and_arange.py
+++ b/test/legacy_test/test_range_and_arange.py
@@ -19,7 +19,8 @@
 from utils import dygraph_guard
 
 import paddle
-from paddle.static import InputSpec
+from paddle.base.layer_helper import LayerHelper
+from paddle.static import InputSpec, Program, program_guard
 
 
 class TestTensorCreation(unittest.TestCase):
@@ -128,6 +129,7 @@ def range_manual(start, end, step, dtype, device, requires_grad):
         ):
             with dygraph_guard():
                 for start, end, step in [
+                    (0, 0, 1),
                     (0, 5, 1),
                     (2, 7, 2),
                     (5, None, 1),
@@ -307,5 +309,84 @@ def test_range(self):
         self.assertEqual(t.stop_gradient, False)
 
 
+class TestRangeV2LegacyInferMeta(unittest.TestCase):
+    """
+    Test that RangeTensorInferMetaLegacy is triggered via legacy static graph path.
+    - TestTensorCreation.test_range (above) calls paddle.range() in
+      dynamic graph mode, which triggers RangeTensorInferMeta (with dtype param).
+    - NO existing test triggers RangeTensorInferMetaLegacy (no dtype param),
+      because paddle.range() has no old static graph fallback like
+      paddle.arange() does (which falls back to append_op(type='range')
+      → mapped to the 'arange' op, not 'range_v2').
+    - To trigger RangeTensorInferMetaLegacy, we use append_op(type='range_v2')
+      under static graph mode).
+    """
+
+    def range_manual(self, start, end, step, dtype):
+        size_ = int(np.abs(np.trunc((end - start) / step))) + 1
+        out = np.empty([size_], dtype=dtype)
+        for i in range(size_):
+            out[i] = start + i * step
+        return out
+
+    def test_range_v2_legacy(self):
+        paddle.enable_static()
+        try:
+            test_cases = [
+                (0, 5, 1),
+                (2, 7, 2),
+                (0, 1, 0.1),
+                (10, 1, -2),
+                (-1, -10, -2),
+            ]
+            for start_val, end_val, step_val in test_cases:
+                with (
+                    paddle.pir_utils.OldIrGuard(),
+                    program_guard(Program(), Program()),
+                ):
+                    start = paddle.static.data(
+                        name='start', shape=[1], dtype='float32'
+                    )
+                    end = paddle.static.data(
+                        name='end', shape=[1], dtype='float32'
+                    )
+                    step = paddle.static.data(
+                        name='step', shape=[1], dtype='float32'
+                    )
+
+                    helper = LayerHelper('range_v2')
+                    out = helper.create_variable_for_type_inference(
+                        dtype='float32'
+                    )
+                    helper.append_op(
+                        type='range_v2',
+                        inputs={'Start': start, 'End': end, 'Step': step},
+                        outputs={'Out': out},
+                    )
+                    self.assertEqual(out.shape, (-1,))
+
+                    exe = paddle.static.Executor(paddle.CPUPlace())
+                    (result,) = exe.run(
+                        feed={
+                            'start': np.array([start_val], dtype='float32'),
+                            'end': np.array([end_val], dtype='float32'),
+                            'step': np.array([step_val], dtype='float32'),
+                        },
+                        fetch_list=[out],
+                    )
+                    expected = self.range_manual(
+                        start_val, end_val, step_val, 'float32'
+                    )
+                    np.testing.assert_allclose(
+                        result,
+                        expected,
+                        rtol=1e-6,
+                        atol=1e-6,
+                        err_msg=f"[FAILED] range_v2({start_val},{end_val},{step_val})",
+                    )
+        finally:
+            paddle.disable_static()
+
+
 if __name__ == '__main__':
     unittest.main()