InfiniTensor
diff --git a/‎src/infiniccl/moore/infiniccl_moore.cc‎
Lines changed: 16 additions & 3 deletions b/‎src/infiniccl/moore/infiniccl_moore.cc‎
Lines changed: 16 additions & 3 deletions
diff --git a/‎src/infinicore/nn/linear.cc‎
Lines changed: 1 addition & 1 deletion b/‎src/infinicore/nn/linear.cc‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/infiniop/ops/causal_softmax/operator.cc‎
Lines changed: 12 additions & 4 deletions b/‎src/infiniop/ops/causal_softmax/operator.cc‎
Lines changed: 12 additions & 4 deletions
diff --git a/‎src/infiniop/ops/clip/operator.cc‎
Lines changed: 2 additions & 2 deletions b/‎src/infiniop/ops/clip/operator.cc‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/infiniop/ops/logsoftmax/operator.cc‎
Lines changed: 8 additions & 4 deletions b/‎src/infiniop/ops/logsoftmax/operator.cc‎
Lines changed: 8 additions & 4 deletions
diff --git a/‎src/infiniop/ops/paged_caching/cuda/kernel.cuh‎
Lines changed: 7 additions & 5 deletions b/‎src/infiniop/ops/paged_caching/cuda/kernel.cuh‎
Lines changed: 7 additions & 5 deletions
diff --git a/‎src/infiniop/ops/paged_caching/info.h‎
Lines changed: 13 additions & 1 deletion b/‎src/infiniop/ops/paged_caching/info.h‎
Lines changed: 13 additions & 1 deletion
diff --git a/‎src/infiniop/ops/paged_caching/metax/paged_caching_metax.maca‎
Lines changed: 26 additions & 5 deletions b/‎src/infiniop/ops/paged_caching/metax/paged_caching_metax.maca‎
Lines changed: 26 additions & 5 deletions
diff --git a/‎src/infiniop/ops/paged_caching/moore/paged_caching_moore.mu‎
Lines changed: 26 additions & 5 deletions b/‎src/infiniop/ops/paged_caching/moore/paged_caching_moore.mu‎
Lines changed: 26 additions & 5 deletions
@@ -23,6 +23,12 @@ inline mcclDataType_t getMcclDtype(infiniDtype_t datatype) {
         return mcclFloat;
     case INFINI_DTYPE_F16:
         return mcclHalf;
+
+#if MARCH_TYPE == 310
+    case INFINI_DTYPE_BF16:
+        return mcclBfloat16;
+#endif
+
     default:
         std::abort();
         return mcclHalf;
@@ -83,9 +89,16 @@ infiniStatus_t allReduce(
     infinicclComm_t comm,
     infinirtStream_t stream) {
 
-    if (datatype != INFINI_DTYPE_F32 && datatype != INFINI_DTYPE_F16) {
-        return INFINI_STATUS_BAD_PARAM;
-    }
+#if MARCH_TYPE == 310
+    CHECK_DTYPE(datatype,
+                INFINI_DTYPE_F32,
+                INFINI_DTYPE_F16,
+                INFINI_DTYPE_BF16);
+#else
+    CHECK_DTYPE(datatype,
+                INFINI_DTYPE_F32,
+                INFINI_DTYPE_F16);
+#endif
 
     CHECK_MCCL(mcclAllReduce(sendbuf, recvbuf, count, getMcclDtype(datatype),
                              getMcclRedOp(op), getMcclComm(comm), getMusaStream(stream)));
 
@@ -131,7 +131,7 @@ Linear::Linear(size_t in_features, size_t out_features,
         this->register_parameter("qweight", weight_);
         weight_zeros_ = infinicore::nn::Parameter({out_features, in_features}, infinicore::DataType::I32, device);
         this->register_parameter("qzeros", weight_zeros_);
-        weight_scale_ = infinicore::nn::Parameter({out_features, in_features}, infinicore::DataType::F16, device);
+        weight_scale_ = infinicore::nn::Parameter({out_features, in_features}, dtype_, device);
         this->register_parameter("scales", weight_scale_);
         if (bias) {
             INFINICORE_NN_PARAMETER_INIT(bias, ({out_features}, dtype_, device));
 
@@ -72,8 +72,10 @@ __C infiniStatus_t infiniopCreateCausalSoftmaxDescriptor(
 #ifdef ENABLE_MOORE_API
         CREATE(INFINI_DEVICE_MOORE, moore)
 #endif
+    default:
+        return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
     }
-    return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
+#undef CREATE
 }
 
 __C infiniStatus_t infiniopGetCausalSoftmaxWorkspaceSize(infiniopCausalSoftmaxDescriptor_t desc, size_t *size) {
@@ -117,8 +119,10 @@ __C infiniStatus_t infiniopGetCausalSoftmaxWorkspaceSize(infiniopCausalSoftmaxDe
 #ifdef ENABLE_MOORE_API
         GET(INFINI_DEVICE_MOORE, moore)
 #endif
+    default:
+        return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
     }
-    return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
+#undef GET
 }
 
 __C infiniStatus_t infiniopCausalSoftmax(
@@ -167,8 +171,10 @@ __C infiniStatus_t infiniopCausalSoftmax(
 #ifdef ENABLE_MOORE_API
         CALCULATE(INFINI_DEVICE_MOORE, moore)
 #endif
+    default:
+        return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
     }
-    return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
+#undef CALCULATE
 }
 
 __C infiniStatus_t infiniopDestroyCausalSoftmaxDescriptor(infiniopCausalSoftmaxDescriptor_t desc) {
@@ -212,6 +218,8 @@ __C infiniStatus_t infiniopDestroyCausalSoftmaxDescriptor(infiniopCausalSoftmaxD
 #ifdef ENABLE_MOORE_API
         DESTROY(INFINI_DEVICE_MOORE, moore)
 #endif
+    default:
+        return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
     }
-    return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
+#undef DESTROY
 }
@@ -91,11 +91,11 @@ __C infiniStatus_t infiniopGetClipWorkspaceSize(infiniopClipDescriptor_t desc, s
 #ifdef ENABLE_KUNLUN_API
         GET(INFINI_DEVICE_KUNLUN, kunlun)
 #endif
+    default:
+        return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
     }
 
 #undef GET
-
-    return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
 }
 
 __C infiniStatus_t infiniopClip(
 
@@ -51,8 +51,9 @@ __C infiniStatus_t infiniopCreateLogSoftmaxDescriptor(
 #ifdef ENABLE_ASCEND_API
         // CREATE(INFINI_DEVICE_ASCEND, ascend)
 #endif
+    default:
+        return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
     }
-    return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
 }
 
 __C infiniStatus_t infiniopGetLogSoftmaxWorkspaceSize(infiniopLogSoftmaxDescriptor_t desc, size_t *size) {
@@ -84,8 +85,9 @@ __C infiniStatus_t infiniopGetLogSoftmaxWorkspaceSize(infiniopLogSoftmaxDescript
 #ifdef ENABLE_ASCEND_API
         // GET(INFINI_DEVICE_ASCEND, ascend)
 #endif
+    default:
+        return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
     }
-    return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
 }
 
 __C infiniStatus_t infiniopLogSoftmax(
@@ -122,8 +124,9 @@ __C infiniStatus_t infiniopLogSoftmax(
 #ifdef ENABLE_ASCEND_API
         // CALCULATE(INFINI_DEVICE_ASCEND, ascend)
 #endif
+    default:
+        return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
     }
-    return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
 }
 
 __C infiniStatus_t infiniopDestroyLogSoftmaxDescriptor(infiniopLogSoftmaxDescriptor_t desc) {
@@ -155,6 +158,7 @@ __C infiniStatus_t infiniopDestroyLogSoftmaxDescriptor(infiniopLogSoftmaxDescrip
 #ifdef ENABLE_ASCEND_API
         // DESTROY(INFINI_DEVICE_ASCEND, ascend)
 #endif
+    default:
+        return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
     }
-    return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
 }
@@ -38,7 +38,11 @@ __device__ void pagedCachingKernel(
     const ptrdiff_t k_src_stride,         // Stride between tokens in the source K tensor
     const ptrdiff_t v_src_stride,         // Stride between tokens in the source V tensor
     const ptrdiff_t k_cache_block_stride, // Stride between blocks in the K cache pool
-    const ptrdiff_t v_cache_block_stride  // Stride between blocks in the V cache pool
+    const ptrdiff_t v_cache_block_stride, // Stride between blocks in the V cache pool
+    const ptrdiff_t k_cache_head_stride,  // Stride between heads in the K cache pool
+    const ptrdiff_t v_cache_head_stride,  // Stride between heads in the V cache pool
+    const ptrdiff_t k_cache_slot_stride,  // Stride between block slots in the K cache pool
+    const ptrdiff_t v_cache_slot_stride   // Stride between block slots in the V cache pool
 ) {
     //================================================================================
     // 1. Identify Work Unit & Calculate Addresses
@@ -66,13 +70,11 @@ __device__ void pagedCachingKernel(
 
     // Destination pointer calculation assumes a [num_blocks, block_size, num_heads, head_size] layout.
     // We point to the beginning of the memory region for this token's slot.
-    const ptrdiff_t cache_head_stride = block_size * head_size;
-
     Tdata *k_cache_block_base_ptr = k_cache_ptr + physical_block_idx * k_cache_block_stride;
-    Tdata *k_dst_head_ptr = k_cache_block_base_ptr + head_idx * cache_head_stride + block_offset * head_size;
+    Tdata *k_dst_head_ptr = k_cache_block_base_ptr + head_idx * k_cache_head_stride + block_offset * k_cache_slot_stride;
 
     Tdata *v_cache_block_base_ptr = v_cache_ptr + physical_block_idx * v_cache_block_stride;
-    Tdata *v_dst_head_ptr = v_cache_block_base_ptr + head_idx * cache_head_stride + block_offset * head_size;
+    Tdata *v_dst_head_ptr = v_cache_block_base_ptr + head_idx * v_cache_head_stride + block_offset * v_cache_slot_stride;
 
     //================================================================================
     // 2. Perform Element-wise Data Copy (Safe, Non-Vectorized)
 
@@ -26,6 +26,10 @@ class PagedCachingInfo {
     ptrdiff_t v_src_stride;
     ptrdiff_t k_cache_block_stride;
     ptrdiff_t v_cache_block_stride;
+    ptrdiff_t k_cache_head_stride;
+    ptrdiff_t v_cache_head_stride;
+    ptrdiff_t k_cache_slot_stride;
+    ptrdiff_t v_cache_slot_stride;
 
     static utils::Result<PagedCachingInfo> create(
         infiniopTensorDescriptor_t k_cache_desc,
@@ -63,6 +67,10 @@ class PagedCachingInfo {
         ptrdiff_t v_src_stride = v_desc->stride(0);
         ptrdiff_t k_cache_block_stride = k_cache_desc->stride(0);
         ptrdiff_t v_cache_block_stride = v_cache_desc->stride(0);
+        ptrdiff_t k_cache_head_stride = k_cache_desc->stride(1);
+        ptrdiff_t v_cache_head_stride = v_cache_desc->stride(1);
+        ptrdiff_t k_cache_slot_stride = k_cache_desc->stride(2);
+        ptrdiff_t v_cache_slot_stride = v_cache_desc->stride(2);
 
         return utils::Result<PagedCachingInfo>(PagedCachingInfo{
             dtype,
@@ -73,7 +81,11 @@ class PagedCachingInfo {
             k_src_stride,
             v_src_stride,
             k_cache_block_stride,
-            v_cache_block_stride});
+            v_cache_block_stride,
+            k_cache_head_stride,
+            v_cache_head_stride,
+            k_cache_slot_stride,
+            v_cache_slot_stride});
     }
 };
 
 
@@ -10,10 +10,13 @@ INFINIOP_METAX_KERNEL pagedCaching(
     const int64_t *slot_mapping,
     const size_t head_size, const size_t block_size,
     const ptrdiff_t k_src_stride, const ptrdiff_t v_src_stride,
-    const ptrdiff_t k_cache_block_stride, const ptrdiff_t v_cache_block_stride) {
+    const ptrdiff_t k_cache_block_stride, const ptrdiff_t v_cache_block_stride,
+    const ptrdiff_t k_cache_head_stride, const ptrdiff_t v_cache_head_stride,
+    const ptrdiff_t k_cache_slot_stride, const ptrdiff_t v_cache_slot_strid) {
     op::paged_caching::cuda::pagedCachingKernel<Tdata, NUM_THREADS>(
         k_cache, v_cache, k, v, slot_mapping, head_size,
-        block_size, k_src_stride, v_src_stride, k_cache_block_stride, v_cache_block_stride);
+        block_size, k_src_stride, v_src_stride, 
+        k_cache_block_stride, v_cache_block_stride, k_cache_head_stride, v_cache_head_stride, k_cache_slot_stride, v_cache_slot_stride);
 }
 
 namespace op::paged_caching::metax {
@@ -59,6 +62,8 @@ infiniStatus_t launchKernel(const PagedCachingInfo &info,
                             size_t num_tokens, size_t num_kv_heads, size_t head_size, size_t block_size,
                             ptrdiff_t k_src_stride, ptrdiff_t v_src_stride,
                             ptrdiff_t k_cache_block_stride, ptrdiff_t v_cache_block_stride,
+                            ptrdiff_t k_cache_head_stride, ptrdiff_t v_cache_head_stride,
+                            ptrdiff_t k_cache_slot_stride, ptrdiff_t v_cache_slot_stride,
                             hcStream_t stream) {
 
     // Grid dimension is 1D, with one block per token, as we decided.
@@ -83,7 +88,11 @@ infiniStatus_t launchKernel(const PagedCachingInfo &info,
                 k_src_stride,
                 v_src_stride,
                 k_cache_block_stride,
-                v_cache_block_stride);
+                v_cache_block_stride,
+                k_cache_head_stride,
+                v_cache_head_stride,
+                k_cache_slot_stride,
+                v_cache_slot_stride);
     } else if (dtype == INFINI_DTYPE_BF16) {
         pagedCaching<cuda_bfloat16, NUM_THREADS>
             <<<grid, block, shared_mem_size, stream>>>(
@@ -97,7 +106,11 @@ infiniStatus_t launchKernel(const PagedCachingInfo &info,
                 k_src_stride,
                 v_src_stride,
                 k_cache_block_stride,
-                v_cache_block_stride);
+                v_cache_block_stride,
+                k_cache_head_stride,
+                v_cache_head_stride,
+                k_cache_slot_stride,
+                v_cache_slot_stride);
     } else if (dtype == INFINI_DTYPE_F32) {
         pagedCaching<float, NUM_THREADS>
             <<<grid, block, shared_mem_size, stream>>>(
@@ -111,7 +124,11 @@ infiniStatus_t launchKernel(const PagedCachingInfo &info,
                 k_src_stride,
                 v_src_stride,
                 k_cache_block_stride,
-                v_cache_block_stride);
+                v_cache_block_stride,
+                k_cache_head_stride,
+                v_cache_head_stride,
+                k_cache_slot_stride,
+                v_cache_slot_stride);
     } else {
         return INFINI_STATUS_BAD_TENSOR_DTYPE;
     }
@@ -138,13 +155,17 @@ infiniStatus_t Descriptor::calculate(
             _info.num_tokens, _info.num_kv_heads, _info.head_size, _info.block_size,
             _info.k_src_stride, _info.v_src_stride,
             _info.k_cache_block_stride, _info.v_cache_block_stride,
+            _info.k_cache_head_stride, _info.v_cache_head_stride,
+            _info.k_cache_slot_stride, _info.v_cache_slot_stride,
             stream);
     } else if (max_threads >= METAX_BLOCK_SIZE_512) {
         launchKernel<METAX_BLOCK_SIZE_512>(
             _info, k_cache, v_cache, _info.dtype, k, v, slot_mapping,
             _info.num_tokens, _info.num_kv_heads, _info.head_size, _info.block_size,
             _info.k_src_stride, _info.v_src_stride,
             _info.k_cache_block_stride, _info.v_cache_block_stride,
+            _info.k_cache_head_stride, _info.v_cache_head_stride,
+            _info.k_cache_slot_stride, _info.v_cache_slot_stride,
             stream);
     } else {
         // If the device supports fewer threads, return an error.
 
@@ -10,10 +10,13 @@ INFINIOP_MOORE_KERNEL pagedCaching(
     const int64_t *slot_mapping,
     const size_t head_size, const size_t block_size,
     const ptrdiff_t k_src_stride, const ptrdiff_t v_src_stride,
-    const ptrdiff_t k_cache_block_stride, const ptrdiff_t v_cache_block_stride) {
+    const ptrdiff_t k_cache_block_stride, const ptrdiff_t v_cache_block_stride,
+    const ptrdiff_t k_cache_head_stride, const ptrdiff_t v_cache_head_stride,
+    const ptrdiff_t k_cache_slot_stride, const ptrdiff_t v_cache_slot_stride) {
     op::paged_caching::cuda::pagedCachingKernel<Tdata, NUM_THREADS>(
         k_cache, v_cache, k, v, slot_mapping, head_size,
-        block_size, k_src_stride, v_src_stride, k_cache_block_stride, v_cache_block_stride);
+        block_size, k_src_stride, v_src_stride, 
+        k_cache_block_stride, v_cache_block_stride, k_cache_head_stride, v_cache_head_stride, k_cache_slot_stride, v_cache_slot_stride);
 }
 
 namespace op::paged_caching::moore {
@@ -59,6 +62,8 @@ infiniStatus_t launchKernel(const PagedCachingInfo &info,
                             size_t num_tokens, size_t num_kv_heads, size_t head_size, size_t block_size,
                             ptrdiff_t k_src_stride, ptrdiff_t v_src_stride,
                             ptrdiff_t k_cache_block_stride, ptrdiff_t v_cache_block_stride,
+                            ptrdiff_t k_cache_head_stride, ptrdiff_t v_cache_head_stride,
+                            ptrdiff_t k_cache_slot_stride, ptrdiff_t v_cache_slot_stride,
                             musaStream_t stream) {
 
     // Grid dimension is 1D, with one block per token, as we decided.
@@ -83,7 +88,11 @@ infiniStatus_t launchKernel(const PagedCachingInfo &info,
                 k_src_stride,
                 v_src_stride,
                 k_cache_block_stride,
-                v_cache_block_stride);
+                v_cache_block_stride,
+                k_cache_head_stride,
+                v_cache_head_stride,
+                k_cache_slot_stride,
+                v_cache_slot_stride);
     } else if (dtype == INFINI_DTYPE_BF16) {
         pagedCaching<__mt_bfloat16, NUM_THREADS>
             <<<grid, block, shared_mem_size, stream>>>(
@@ -97,7 +106,11 @@ infiniStatus_t launchKernel(const PagedCachingInfo &info,
                 k_src_stride,
                 v_src_stride,
                 k_cache_block_stride,
-                v_cache_block_stride);
+                v_cache_block_stride,
+                k_cache_head_stride,
+                v_cache_head_stride,
+                k_cache_slot_stride,
+                v_cache_slot_stride);
     } else if (dtype == INFINI_DTYPE_F32) {
         pagedCaching<float, NUM_THREADS>
             <<<grid, block, shared_mem_size, stream>>>(
@@ -111,7 +124,11 @@ infiniStatus_t launchKernel(const PagedCachingInfo &info,
                 k_src_stride,
                 v_src_stride,
                 k_cache_block_stride,
-                v_cache_block_stride);
+                v_cache_block_stride,
+                k_cache_head_stride,
+                v_cache_head_stride,
+                k_cache_slot_stride,
+                v_cache_slot_stride);
     } else {
         return INFINI_STATUS_BAD_TENSOR_DTYPE;
     }
@@ -137,13 +154,17 @@ infiniStatus_t Descriptor::calculate(
             _info.num_tokens, _info.num_kv_heads, _info.head_size, _info.block_size,
             _info.k_src_stride, _info.v_src_stride,
             _info.k_cache_block_stride, _info.v_cache_block_stride,
+            _info.k_cache_head_stride, _info.v_cache_head_stride,
+            _info.k_cache_slot_stride, _info.v_cache_slot_stride,
             stream);
     } else if (_opaque->internal->maxThreadsPerBlock() >= MOORE_BLOCK_SIZE_512) {
         launchKernel<MOORE_BLOCK_SIZE_512>(
             _info, k_cache, v_cache, _info.dtype, k, v, slot_mapping,
             _info.num_tokens, _info.num_kv_heads, _info.head_size, _info.block_size,
             _info.k_src_stride, _info.v_src_stride,
             _info.k_cache_block_stride, _info.v_cache_block_stride,
+            _info.k_cache_head_stride, _info.v_cache_head_stride,
+            _info.k_cache_slot_stride, _info.v_cache_slot_stride,
             stream);
     } else {
         // If the GPU is older and supports fewer threads, return an error.
Original file line number	Diff line number	Diff line change
`@@ -72,8 +72,10 @@ __C infiniStatus_t infiniopCreateCausalSoftmaxDescriptor(`
`72`	`72`	`#ifdef ENABLE_MOORE_API`
`73`	`73`	`CREATE(INFINI_DEVICE_MOORE, moore)`
`74`	`74`	`#endif`
	`75`	`+ default:`
	`76`	`+ return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;`
`75`	`77`	`}`
`76`		`- return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;`
	`78`	`+#undef CREATE`
`77`	`79`	`}`
`78`	`80`
`79`	`81`	`__C infiniStatus_t infiniopGetCausalSoftmaxWorkspaceSize(infiniopCausalSoftmaxDescriptor_t desc, size_t *size) {`
`@@ -117,8 +119,10 @@ __C infiniStatus_t infiniopGetCausalSoftmaxWorkspaceSize(infiniopCausalSoftmaxDe`
`117`	`119`	`#ifdef ENABLE_MOORE_API`
`118`	`120`	`GET(INFINI_DEVICE_MOORE, moore)`
`119`	`121`	`#endif`
	`122`	`+ default:`
	`123`	`+ return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;`
`120`	`124`	`}`
`121`		`- return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;`
	`125`	`+#undef GET`
`122`	`126`	`}`
`123`	`127`
`124`	`128`	`__C infiniStatus_t infiniopCausalSoftmax(`
`@@ -167,8 +171,10 @@ __C infiniStatus_t infiniopCausalSoftmax(`
`167`	`171`	`#ifdef ENABLE_MOORE_API`
`168`	`172`	`CALCULATE(INFINI_DEVICE_MOORE, moore)`
`169`	`173`	`#endif`
	`174`	`+ default:`
	`175`	`+ return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;`
`170`	`176`	`}`
`171`		`- return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;`
	`177`	`+#undef CALCULATE`
`172`	`178`	`}`
`173`	`179`
`174`	`180`	`__C infiniStatus_t infiniopDestroyCausalSoftmaxDescriptor(infiniopCausalSoftmaxDescriptor_t desc) {`
`@@ -212,6 +218,8 @@ __C infiniStatus_t infiniopDestroyCausalSoftmaxDescriptor(infiniopCausalSoftmaxD`
`212`	`218`	`#ifdef ENABLE_MOORE_API`
`213`	`219`	`DESTROY(INFINI_DEVICE_MOORE, moore)`
`214`	`220`	`#endif`
	`221`	`+ default:`
	`222`	`+ return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;`
`215`	`223`	`}`
`216`		`- return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;`
	`224`	`+#undef DESTROY`
`217`	`225`	`}`
Original file line number	Diff line number	Diff line change
`@@ -91,11 +91,11 @@ __C infiniStatus_t infiniopGetClipWorkspaceSize(infiniopClipDescriptor_t desc, s`
`91`	`91`	`#ifdef ENABLE_KUNLUN_API`
`92`	`92`	`GET(INFINI_DEVICE_KUNLUN, kunlun)`
`93`	`93`	`#endif`
	`94`	`+ default:`
	`95`	`+ return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;`
`94`	`96`	`}`
`95`	`97`
`96`	`98`	`#undef GET`
`97`		`-`
`98`		`- return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;`
`99`	`99`	`}`
`100`	`100`
`101`	`101`	`__C infiniStatus_t infiniopClip(`
Original file line number	Diff line number	Diff line change
`@@ -51,8 +51,9 @@ __C infiniStatus_t infiniopCreateLogSoftmaxDescriptor(`
`51`	`51`	`#ifdef ENABLE_ASCEND_API`
`52`	`52`	`// CREATE(INFINI_DEVICE_ASCEND, ascend)`
`53`	`53`	`#endif`
	`54`	`+ default:`
	`55`	`+ return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;`
`54`	`56`	`}`
`55`		`- return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;`
`56`	`57`	`}`
`57`	`58`
`58`	`59`	`__C infiniStatus_t infiniopGetLogSoftmaxWorkspaceSize(infiniopLogSoftmaxDescriptor_t desc, size_t *size) {`
`@@ -84,8 +85,9 @@ __C infiniStatus_t infiniopGetLogSoftmaxWorkspaceSize(infiniopLogSoftmaxDescript`
`84`	`85`	`#ifdef ENABLE_ASCEND_API`
`85`	`86`	`// GET(INFINI_DEVICE_ASCEND, ascend)`
`86`	`87`	`#endif`
	`88`	`+ default:`
	`89`	`+ return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;`
`87`	`90`	`}`
`88`		`- return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;`
`89`	`91`	`}`
`90`	`92`
`91`	`93`	`__C infiniStatus_t infiniopLogSoftmax(`
`@@ -122,8 +124,9 @@ __C infiniStatus_t infiniopLogSoftmax(`
`122`	`124`	`#ifdef ENABLE_ASCEND_API`
`123`	`125`	`// CALCULATE(INFINI_DEVICE_ASCEND, ascend)`
`124`	`126`	`#endif`
	`127`	`+ default:`
	`128`	`+ return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;`
`125`	`129`	`}`
`126`		`- return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;`
`127`	`130`	`}`
`128`	`131`
`129`	`132`	`__C infiniStatus_t infiniopDestroyLogSoftmaxDescriptor(infiniopLogSoftmaxDescriptor_t desc) {`
`@@ -155,6 +158,7 @@ __C infiniStatus_t infiniopDestroyLogSoftmaxDescriptor(infiniopLogSoftmaxDescrip`
`155`	`158`	`#ifdef ENABLE_ASCEND_API`
`156`	`159`	`// DESTROY(INFINI_DEVICE_ASCEND, ascend)`
`157`	`160`	`#endif`
	`161`	`+ default:`
	`162`	`+ return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;`
`158`	`163`	`}`
`159`		`- return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;`
`160`	`164`	`}`