refactor(ascend): address PR #64 review — clean headers, Markdown in TORCH_CHECK, Google C++ naming

zhangyue · zhangyue · commit 720234d29f65 · 2026-04-21T14:17:32.000+08:00
- `workspace_pool_.h`: uncomment `&lt;cinttypes&gt;` / `&lt;cstdio&gt;` (needed for `PRIu64` and `fprintf` in the destructor; not transitively available on all platforms).
- `device_.h`: switch relative `../device.h` to absolute `device.h` — the historical `src/ascend/device.h` naming collision is no longer relevant.
- `custom/{add_rms_norm,rms_norm}/op_host/*.cpp`: drop unneeded BSD-3-Clause headers and switch `TORCH_CHECK` messages to Markdown-backticked identifiers.
- `custom/{add_rms_norm,rms_norm}/op_kernel/*.cpp`: drop unneeded BSD-3-Clause headers.
- Rename wrapper functions to PascalCase per Google C++ Style: `add_rms_norm` → `AddRmsNorm`, `rms_norm` → `RmsNorm` (ops.h + torch_binding.cpp updated; `torch.ops.npu.rms_norm` registry name unchanged; kernel entry-point names stay snake_case as required by `EXEC_KERNEL_CMD`).
diff --git a/src/ascend/custom/add_rms_norm/op_host/add_rms_norm.cpp b/src/ascend/custom/add_rms_norm/op_host/add_rms_norm.cpp
@@ -1,31 +1,26 @@
-/*
- * Copyright (c) 2025, InfiniTensor.
- * All rights reserved.
- *
- * SPDX-License-Identifier: BSD-3-Clause
- */
-
 #include "aclrtlaunch_add_rms_norm.h"
 #include "tiling/platform/platform_ascendc.h"
 #include "torch_kernel_helper.h"
 
 namespace ascend::detail {
 
-std::vector<at::Tensor> add_rms_norm(const at::Tensor& x1, const at::Tensor& x2,
-                                     const at::Tensor& weight, double eps) {
+std::vector<at::Tensor> AddRmsNorm(const at::Tensor& x1, const at::Tensor& x2,
+                                   const at::Tensor& weight, double eps) {
   // Input validation.
-  TORCH_CHECK(x1.dim() > 0, "add_rms_norm: x1 must have at least 1 dimension");
+  TORCH_CHECK(x1.dim() > 0,
+              "`AddRmsNorm`: `x1` must have at least 1 dimension.");
   TORCH_CHECK(x1.sizes() == x2.sizes(),
-              "add_rms_norm: x1 and x2 must have the same shape");
+              "`AddRmsNorm`: `x1` and `x2` must have the same shape.");
   TORCH_CHECK(x1.scalar_type() == x2.scalar_type(),
-              "add_rms_norm: x1 and x2 must have the same dtype");
+              "`AddRmsNorm`: `x1` and `x2` must have the same dtype.");
   TORCH_CHECK(x1.scalar_type() == at::kHalf || x1.scalar_type() == at::kFloat,
-              "add_rms_norm: only float16 and float32 are supported, got ",
-              x1.scalar_type());
-  TORCH_CHECK(weight.dim() == 1, "add_rms_norm: weight must be 1-dimensional");
-  TORCH_CHECK(weight.size(0) == x1.size(-1), "add_rms_norm: weight size (",
+              "`AddRmsNorm`: only `float16` and `float32` are supported; got ",
+              x1.scalar_type(), ".");
+  TORCH_CHECK(weight.dim() == 1,
+              "`AddRmsNorm`: `weight` must be 1-dimensional.");
+  TORCH_CHECK(weight.size(0) == x1.size(-1), "`AddRmsNorm`: `weight` size (",
               weight.size(0), ") must match input last dim (", x1.size(-1),
-              ")");
+              ").");
 
   int64_t dim_length = x1.size(-1);
   int64_t total_rows = x1.numel() / dim_length;
@@ -62,9 +57,10 @@ std::vector<at::Tensor> add_rms_norm(const at::Tensor& x1, const at::Tensor& x2,
   int64_t max_dim_length = (ub_size_limit - 1024) / buffer_coefficient;
   int64_t fp_align_elements = 32 / 4;
   max_dim_length = (max_dim_length / fp_align_elements) * fp_align_elements;
-  TORCH_CHECK(dim_length_align <= max_dim_length, "add_rms_norm: dim_length ",
-              dim_length, " (aligned ", dim_length_align,
-              ") exceeds UB capacity (max ", max_dim_length, ")");
+  TORCH_CHECK(dim_length_align <= max_dim_length,
+              "`AddRmsNorm`: `dim_length` ", dim_length, " (aligned ",
+              dim_length_align, ") exceeds UB capacity (max ", max_dim_length,
+              ").");
 
   // Padding.
   at::Tensor kernel_input1;
@@ -109,6 +105,12 @@ std::vector<at::Tensor> add_rms_norm(const at::Tensor& x1, const at::Tensor& x2,
   float eps_float = static_cast<float>(eps);
   int64_t dtype_size_val = dtype_size;
 
+  // The first arg `add_rms_norm` is the AscendC kernel entry-point name — it
+  // must match `ascendc_add_operator(OP_NAME add_rms_norm)` in `CMakeLists.txt`,
+  // the `__global__ __aicore__ void add_rms_norm(...)` definition in
+  // `op_kernel/`, and the generated `aclrtlaunch_add_rms_norm.h` header.
+  // Google C++ Style's PascalCase rule does NOT apply: this identifier is
+  // dictated by the AscendC toolchain's symbol convention.
   EXEC_KERNEL_CMD(add_rms_norm, block_dim, kernel_input1, kernel_input2,
                   weight_float, kernel_output_y, kernel_output_x_out,
                   total_rows, dim_length, dim_length_align, former_num,
diff --git a/src/ascend/custom/add_rms_norm/op_kernel/add_rms_norm.cpp b/src/ascend/custom/add_rms_norm/op_kernel/add_rms_norm.cpp
@@ -1,10 +1,3 @@
-/*
- * Copyright (c) 2025, InfiniTensor.
- * All rights reserved.
- *
- * SPDX-License-Identifier: BSD-3-Clause
- */
-
 #include "kernel_operator.h"
 
 constexpr int32_t BUFFER_NUM = 2;
diff --git a/src/ascend/custom/ops.h b/src/ascend/custom/ops.h
@@ -20,8 +20,8 @@
 
 namespace ascend::detail {
 
-at::Tensor rms_norm(const at::Tensor& input, const at::Tensor& weight,
-                    double eps);
+at::Tensor RmsNorm(const at::Tensor& input, const at::Tensor& weight,
+                   double eps);
 
 }  // namespace ascend::detail
 
diff --git a/src/ascend/custom/rms_norm/op_host/rms_norm.cpp b/src/ascend/custom/rms_norm/op_host/rms_norm.cpp
@@ -1,29 +1,22 @@
-/*
- * Copyright (c) 2025, InfiniTensor.
- * All rights reserved.
- *
- * SPDX-License-Identifier: BSD-3-Clause
- */
-
 #include "aclrtlaunch_rms_norm.h"
 #include "tiling/platform/platform_ascendc.h"
 #include "torch_kernel_helper.h"
 
 namespace ascend::detail {
 
-at::Tensor rms_norm(const at::Tensor& input, const at::Tensor& weight,
-                    double eps) {
+at::Tensor RmsNorm(const at::Tensor& input, const at::Tensor& weight,
+                   double eps) {
   // Input validation.
   TORCH_CHECK(input.dim() > 0,
-              "rms_norm: input must have at least 1 dimension");
+              "`RmsNorm`: `input` must have at least 1 dimension.");
   TORCH_CHECK(
       input.scalar_type() == at::kHalf || input.scalar_type() == at::kFloat,
-      "rms_norm: only float16 and float32 are supported, got ",
-      input.scalar_type());
-  TORCH_CHECK(weight.dim() == 1, "rms_norm: weight must be 1-dimensional");
-  TORCH_CHECK(weight.size(0) == input.size(-1), "rms_norm: weight size (",
+      "`RmsNorm`: only `float16` and `float32` are supported; got ",
+      input.scalar_type(), ".");
+  TORCH_CHECK(weight.dim() == 1, "`RmsNorm`: `weight` must be 1-dimensional.");
+  TORCH_CHECK(weight.size(0) == input.size(-1), "`RmsNorm`: `weight` size (",
               weight.size(0), ") must match input last dim (", input.size(-1),
-              ")");
+              ").");
 
   int64_t dim_length = input.size(-1);
   int64_t total_rows = input.numel() / dim_length;
@@ -61,9 +54,10 @@ at::Tensor rms_norm(const at::Tensor& input, const at::Tensor& weight,
   // `fp32` alignment.
   int64_t fp_align_elements = 32 / 4;
   max_dim_length = (max_dim_length / fp_align_elements) * fp_align_elements;
-  TORCH_CHECK(dim_length_align <= max_dim_length, "rms_norm: dim_length ",
-              dim_length, " (aligned ", dim_length_align,
-              ") exceeds UB capacity (max ", max_dim_length, ")");
+  TORCH_CHECK(dim_length_align <= max_dim_length,
+              "`RmsNorm`: `dim_length` ", dim_length, " (aligned ",
+              dim_length_align, ") exceeds UB capacity (max ", max_dim_length,
+              ").");
 
   // Padding.
   at::Tensor kernel_input;
@@ -100,6 +94,12 @@ at::Tensor rms_norm(const at::Tensor& input, const at::Tensor& weight,
   float eps_float = static_cast<float>(eps);
   int64_t dtype_size_val = dtype_size;
 
+  // The first arg `rms_norm` is the AscendC kernel entry-point name — it
+  // must match `ascendc_add_operator(OP_NAME rms_norm)` in `CMakeLists.txt`,
+  // the `__global__ __aicore__ void rms_norm(...)` definition in `op_kernel/`,
+  // and the generated `aclrtlaunch_rms_norm.h` header.  Google C++ Style's
+  // PascalCase rule does NOT apply: this identifier is dictated by the
+  // AscendC toolchain's symbol convention.
   EXEC_KERNEL_CMD(rms_norm, block_dim, kernel_input, weight_float,
                   kernel_output, total_rows, dim_length, dim_length_align,
                   former_num, former_length, tail_length, eps_float,
diff --git a/src/ascend/custom/rms_norm/op_kernel/rms_norm.cpp b/src/ascend/custom/rms_norm/op_kernel/rms_norm.cpp
@@ -1,10 +1,3 @@
-/*
- * Copyright (c) 2025, InfiniTensor.
- * All rights reserved.
- *
- * SPDX-License-Identifier: BSD-3-Clause
- */
-
 #include "kernel_operator.h"
 
 constexpr int32_t BUFFER_NUM = 2;
diff --git a/src/ascend/custom/torch_binding.cpp b/src/ascend/custom/torch_binding.cpp
@@ -26,6 +26,6 @@ TORCH_LIBRARY_FRAGMENT(npu, m) {
 }
 
 TORCH_LIBRARY_IMPL(npu, PrivateUse1, m) {
-  m.impl("rms_norm", TORCH_FN(ascend::detail::rms_norm));
+  m.impl("rms_norm", TORCH_FN(ascend::detail::RmsNorm));
 }
 }  // namespace
diff --git a/src/ascend/device_.h b/src/ascend/device_.h
@@ -1,10 +1,7 @@
 #ifndef INFINI_OPS_ASCEND_DEVICE__H_
 #define INFINI_OPS_ASCEND_DEVICE__H_
 
-// NOTE: Cannot use `#include "device.h"` here — GCC resolves quoted includes
-// relative to the current file first, and `src/ascend/` used to contain a
-// `device.h`.  Use `data_type.h` which transitively pulls in `src/device.h`.
-#include "data_type.h"
+#include "device.h"
 
 namespace infini::ops {
 

Original file line number	Diff line number	Diff line change
`@@ -26,6 +26,6 @@ TORCH_LIBRARY_FRAGMENT(npu, m) {`
`26`	`26`	`}`
`27`	`27`
`28`	`28`	`TORCH_LIBRARY_IMPL(npu, PrivateUse1, m) {`
`29`		`- m.impl("rms_norm", TORCH_FN(ascend::detail::rms_norm));`
	`29`	`+ m.impl("rms_norm", TORCH_FN(ascend::detail::RmsNorm));`
`30`	`30`	`}`
`31`	`31`	`} // namespace`