Skip to content

Commit b32dcb5

Browse files
issue/1135 fix layernorm kernel
1 parent e3dee1b commit b32dcb5

File tree

13 files changed

+713
-113
lines changed

13 files changed

+713
-113
lines changed
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
#pragma once
2+
3+
#include "../ops.hpp"
4+
#include "module.hpp"
5+
6+
namespace infinicore::nn {
7+
8+
class LayerNorm : public Module {
9+
public:
10+
/**
11+
* @brief Construct a LayerNorm layer
12+
*
13+
* @param normalized_shape Size of the feature dimension to normalize (typically hidden_size)
14+
* @param eps Small constant for numerical stability (default: 1e-6)
15+
* @param dtype Data type for the weight (default: DataType::F32)
16+
* @param device Device to create the weight on
17+
* @param elementwise_affine Whether to include learnable affine weight and bias parameters (default: true)
18+
*/
19+
LayerNorm(size_t normalized_shape,
20+
double eps = 1e-6,
21+
const DataType &dtype = DataType::F32,
22+
const Device &device = Device(),
23+
bool elementwise_affine = true);
24+
25+
/**
26+
* @brief Forward pass: apply LayerNorm
27+
*
28+
* @param x Input tensor of shape (*, normalized_shape) where * is any number of dimensions
29+
* @return Normalized tensor with same shape as input
30+
*
31+
* The normalization is applied over the last dimension.
32+
* For example:
33+
* Input: [batch, seq_len, hidden_size] -> normalize over hidden_size
34+
* Input: [batch, hidden_size] -> normalize over hidden_size
35+
*/
36+
Tensor forward(const Tensor &x) const;
37+
38+
double eps() const { return eps_; }
39+
DataType dtype() const { return dtype_; }
40+
41+
// String representation
42+
std::string extra_repr() const;
43+
44+
// Accessors for parameters
45+
Tensor weight() const { return weight_; }
46+
Tensor bias() const { return bias_; }
47+
48+
protected:
49+
// Parameters
50+
INFINICORE_NN_PARAMETER(weight);
51+
INFINICORE_NN_PARAMETER(bias);
52+
53+
private:
54+
size_t normalized_shape_; // Size of the feature dimension
55+
double eps_; // Epsilon for numerical stability
56+
DataType dtype_; // Data type for weight
57+
bool elementwise_affine_; // Whether to use learnable affine parameters
58+
};
59+
} // namespace infinicore::nn
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
#pragma once
2+
3+
#include "../device.hpp"
4+
#include "../graph/graph.hpp"
5+
#include "common/op.hpp"
6+
7+
namespace infinicore::op {
8+
9+
INFINICORE_GRAPH_OP_CLASS(LayerNorm, Tensor, Tensor, Tensor, const Tensor &, const Tensor &, const Tensor &, float);
10+
11+
Tensor layer_norm(const Tensor &x, const Tensor &weight, const Tensor &bias, float epsilon = 1e-5f);
12+
void layer_norm_(Tensor y, Tensor standardization, Tensor std_deviation, const Tensor &x, const Tensor &weight, const Tensor &bias, float epsilon = 1e-5f);
13+
void layer_norm_(Tensor y, const Tensor &x, const Tensor &weight, const Tensor &bias, float epsilon = 1e-5f);
14+
void layer_norm_for_pybind(Tensor y, const Tensor &x, const Tensor &weight, const Tensor &bias, float epsilon = 1e-5f);
15+
16+
} // namespace infinicore::op

python/infinicore/nn/functional/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from .hinge_embedding_loss import hinge_embedding_loss
1414
from .huber_loss import huber_loss
1515
from .interpolate import interpolate
16+
from .layer_norm import layer_norm
1617
from .linear import linear
1718
from .linear_w8a8i8 import linear_w8a8i8
1819
from .log_softmax import log_softmax
@@ -83,4 +84,5 @@
8384
"softplus",
8485
"softsign",
8586
"huber_loss",
87+
"layer_norm",
8688
]
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
from typing import List
2+
3+
from infinicore.lib import _infinicore
4+
from infinicore.tensor import Tensor
5+
6+
7+
def layer_norm(
8+
input: Tensor,
9+
normalized_shape: List[int],
10+
weight: Tensor,
11+
bias: Tensor,
12+
eps: float = 1e-5,
13+
*,
14+
out=None,
15+
) -> Tensor:
16+
r"""Apply Layer Normalization."""
17+
18+
assert normalized_shape == weight.shape, (
19+
"normalized_shape does not match weight.shape."
20+
)
21+
22+
if out is None:
23+
return Tensor(
24+
_infinicore.layer_norm(
25+
input._underlying, weight._underlying, bias._underlying, eps
26+
)
27+
)
28+
29+
_infinicore.layer_norm_(
30+
out._underlying, input._underlying, weight._underlying, bias._underlying, eps
31+
)
32+
33+
return out
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
#include "infinicore/ops/layer_norm.hpp"
2+
#include "../../utils.hpp"
3+
4+
namespace infinicore::op {
5+
INFINICORE_GRAPH_OP_DISPATCHERS_IMPL(LayerNorm);
6+
7+
LayerNorm::LayerNorm(Tensor y, Tensor standardization, Tensor std_deviation, const Tensor &x, const Tensor &weight, const Tensor &bias, float epsilon) {
8+
INFINICORE_ASSERT_TENSORS_SAME_DEVICE(y, standardization, std_deviation, x, weight);
9+
INFINICORE_GRAPH_OP_DISPATCH(y->device().getType(), y, standardization, std_deviation, x, weight, bias, epsilon);
10+
}
11+
12+
void LayerNorm::execute(Tensor y, Tensor standardization, Tensor std_deviation, const Tensor &x, const Tensor &weight, const Tensor &bias, float epsilon) {
13+
INFINICORE_GRAPH_OP_RECORD_OR_RUN(LayerNorm, y, standardization, std_deviation, x, weight, bias, epsilon);
14+
}
15+
16+
Tensor layer_norm(const Tensor &x, const Tensor &weight, const Tensor &bias, float epsilon) {
17+
auto y = Tensor::empty(x->shape(), x->dtype(), x->device());
18+
auto reduced_shape = x->shape();
19+
reduced_shape.pop_back();
20+
auto standardization = Tensor::empty(x->shape(), x->dtype(), x->device());
21+
auto std_deviation = Tensor::empty(reduced_shape, x->dtype(), x->device());
22+
layer_norm_(y, standardization, std_deviation, x, weight, bias, epsilon);
23+
return y;
24+
}
25+
26+
void layer_norm_(Tensor y, Tensor standardization, Tensor std_deviation, const Tensor &x, const Tensor &weight, const Tensor &bias, float epsilon) {
27+
LayerNorm::execute(y, standardization, std_deviation, x, weight, bias, epsilon);
28+
}
29+
30+
void layer_norm_(Tensor y, const Tensor &x, const Tensor &weight, const Tensor &bias, float epsilon) {
31+
auto reduced_shape = x->shape();
32+
reduced_shape.pop_back();
33+
auto standardization = Tensor::empty(x->shape(), x->dtype(), x->device());
34+
auto std_deviation = Tensor::empty(reduced_shape, x->dtype(), x->device());
35+
LayerNorm::execute(y, standardization, std_deviation, x, weight, bias, epsilon);
36+
}
37+
38+
void layer_norm_for_pybind(Tensor y, const Tensor &x, const Tensor &weight, const Tensor &bias, float epsilon) {
39+
layer_norm_(y, x, weight, bias, epsilon);
40+
}
41+
42+
} // namespace infinicore::op
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
#include "infinicore/ops/layer_norm.hpp"
2+
3+
#include "../infiniop_impl.hpp"
4+
5+
namespace infinicore::op::layer_norm_impl::infiniop {
6+
7+
INFINIOP_CACHABLE_DESCRIPTOR(Descriptor, LayerNorm, 100);
8+
9+
struct PlannedMeta {
10+
std::shared_ptr<Descriptor> descriptor;
11+
graph::GraphTensor workspace, y, standardization, std_deviation, x, weight, bias;
12+
};
13+
14+
void *plan(Tensor y, Tensor standardization, Tensor std_deviation, const Tensor &x, const Tensor &weight, const Tensor &bias, float epsilon) {
15+
size_t seed = hash_combine(y, standardization, std_deviation, x, weight, bias, epsilon);
16+
17+
INFINIOP_CACHABLE_DESCRIPTOR_GET_OR_CREATE(
18+
Descriptor, descriptor, LayerNorm,
19+
seed,
20+
y->desc(),
21+
standardization->desc(),
22+
std_deviation->desc(),
23+
x->desc(),
24+
weight->desc(),
25+
bias->desc(),
26+
epsilon);
27+
28+
INFINIOP_WORKSPACE_TENSOR(workspace, LayerNorm, descriptor);
29+
30+
return new PlannedMeta{
31+
descriptor,
32+
graph::GraphTensor(workspace),
33+
graph::GraphTensor(y),
34+
graph::GraphTensor(standardization),
35+
graph::GraphTensor(std_deviation),
36+
graph::GraphTensor(x),
37+
graph::GraphTensor(weight),
38+
graph::GraphTensor(bias)};
39+
}
40+
41+
void run(void *planned_meta) {
42+
auto planned = reinterpret_cast<PlannedMeta *>(planned_meta);
43+
44+
INFINICORE_CHECK_ERROR(
45+
infiniopLayerNorm(
46+
planned->descriptor->desc,
47+
planned->workspace->data(),
48+
planned->workspace->numel(),
49+
planned->y->data(),
50+
planned->standardization->data(),
51+
planned->std_deviation->data(),
52+
planned->x->data(),
53+
planned->weight->data(),
54+
planned->bias->data(),
55+
context::getStream()));
56+
}
57+
58+
void cleanup(void **planned_meta_ptr) {
59+
delete *reinterpret_cast<PlannedMeta **>(planned_meta_ptr);
60+
*planned_meta_ptr = nullptr;
61+
}
62+
63+
INFINICORE_GRAPH_OP_REGISTER_ALLDEVICE(LayerNorm, &plan, &run, &cleanup);
64+
65+
} // namespace infinicore::op::layer_norm_impl::infiniop

src/infinicore/pybind11/ops.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@
5454
#include "ops/kron.hpp"
5555
#include "ops/kthvalue.hpp"
5656
#include "ops/kv_caching.hpp"
57+
#include "ops/layer_norm.hpp"
5758
#include "ops/ldexp.hpp"
5859
#include "ops/lerp.hpp"
5960
#include "ops/linear.hpp"
@@ -216,6 +217,7 @@ inline void bind(py::module &m) {
216217
bind_triplet_margin_loss(m);
217218
bind_selu(m);
218219
bind_sinh(m);
220+
bind_layer_norm(m);
219221
}
220222

221223
} // namespace infinicore::ops
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
#pragma once
2+
3+
#include <pybind11/pybind11.h>
4+
5+
#include "infinicore/ops/layer_norm.hpp"
6+
7+
namespace py = pybind11;
8+
9+
namespace infinicore::ops {
10+
11+
inline void bind_layer_norm(py::module &m) {
12+
m.def("layer_norm",
13+
&op::layer_norm,
14+
py::arg("x"),
15+
py::arg("weight"),
16+
py::arg("bias"),
17+
py::arg("epsilon") = 1e-5f,
18+
R"doc(Layer Normalization.
19+
20+
Args:
21+
x: Input tensor
22+
weight: Scale weights
23+
bias: Bias weights
24+
epsilon: Small constant for numerical stability, default is 1e-5
25+
26+
Returns:
27+
Normalized tensor with same shape as input
28+
)doc");
29+
30+
m.def("layer_norm_",
31+
&op::layer_norm_for_pybind,
32+
py::arg("y"),
33+
py::arg("x"),
34+
py::arg("weight"),
35+
py::arg("bias"),
36+
py::arg("epsilon") = 1e-5f,
37+
R"doc(In-place Layer Normalization.
38+
39+
Args:
40+
y: Output tensor
41+
x: Input tensor
42+
weight: Scale weights
43+
bias: Bias weights
44+
epsilon: Small constant for numerical stability, default is 1e-5
45+
)doc");
46+
}
47+
48+
} // namespace infinicore::ops

src/infiniop/devices/metax/metax_kernel_common.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,9 @@
55
#include <maca_bfloat16.h>
66
#include <maca_fp16.h>
77
#include <maca_fp8.h>
8+
#include <mccub/block/block_reduce.cuh>
89
#else
10+
#include <hccub/block/block_reduce.cuh>
911
#include <hpcc_bfloat16.h>
1012
#include <hpcc_fp16.h>
1113
#include <hpcc_fp8.h>

0 commit comments

Comments
 (0)