Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 59 additions & 0 deletions include/infinicore/nn/layer_norm.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#pragma once

#include "../ops.hpp"
#include "module.hpp"

namespace infinicore::nn {

class LayerNorm : public Module {
public:
/**
* @brief Construct a LayerNorm layer
*
* @param normalized_shape Size of the feature dimension to normalize (typically hidden_size)
* @param eps Small constant for numerical stability (default: 1e-6)
* @param dtype Data type for the weight (default: DataType::F32)
* @param device Device to create the weight on
*/
LayerNorm(size_t normalized_shape,
double eps = 1e-6,
const DataType &dtype = DataType::F32,
const Device &device = Device());

/**
* @brief Forward pass: apply LayerNorm
*
* @param x Input tensor of shape (*, normalized_shape) where * is any number of dimensions
* @return Normalized tensor with same shape as input
*
* The normalization is applied over the last dimension.
* For example:
* Input: [batch, seq_len, hidden_size] -> normalize over hidden_size
* Input: [batch, hidden_size] -> normalize over hidden_size
*/
Tensor forward(const Tensor &x) const;

// Module information
size_t normalized_shape() const { return normalized_shape_; }
double eps() const { return eps_; }
DataType dtype() const { return dtype_; }

// String representation
std::string extra_repr() const;

// Accessors for parameters
Tensor weight() const { return weight_; }
Tensor bias() const { return bias_; }

protected:
// Parameters
INFINICORE_NN_PARAMETER(weight);
INFINICORE_NN_PARAMETER(bias);

private:
size_t normalized_shape_; // Size of the feature dimension
double eps_; // Epsilon for numerical stability
DataType dtype_; // Data type for weight
};

} // namespace infinicore::nn
1 change: 1 addition & 0 deletions include/infinicore/ops.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include "ops/hardswish.hpp"
#include "ops/hardtanh.hpp"
#include "ops/kv_caching.hpp"
#include "ops/layer_norm.hpp"
#include "ops/matmul.hpp"
#include "ops/ones.hpp"
#include "ops/paged_attention.hpp"
Expand Down
16 changes: 16 additions & 0 deletions include/infinicore/ops/layer_norm.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#pragma once

#include "../device.hpp"
#include "../graph/graph.hpp"
#include "common/op.hpp"

namespace infinicore::op {

INFINICORE_GRAPH_OP_CLASS(LayerNorm, Tensor, Tensor, Tensor, const Tensor &, const Tensor &, const Tensor &, float);

Tensor layer_norm(const Tensor &x, const Tensor &weight, const Tensor &bias, float epsilon = 1e-5f);
void layer_norm_(Tensor y, Tensor standardization, Tensor std_deviation, const Tensor &x, const Tensor &weight, const Tensor &bias, float epsilon = 1e-5f);
void layer_norm_(Tensor y, const Tensor &x, const Tensor &weight, const Tensor &bias, float epsilon = 1e-5f);
void layer_norm_for_pybind(Tensor y, const Tensor &x, const Tensor &weight, const Tensor &bias, float epsilon = 1e-5f);

} // namespace infinicore::op
2 changes: 2 additions & 0 deletions python/infinicore/nn/functional/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from .hinge_embedding_loss import hinge_embedding_loss
from .huber_loss import huber_loss
from .interpolate import interpolate
from .layer_norm import layer_norm
from .linear import linear
from .linear_w8a8i8 import linear_w8a8i8
from .log_softmax import log_softmax
Expand Down Expand Up @@ -83,4 +84,5 @@
"softplus",
"softsign",
"huber_loss",
"layer_norm",
]
33 changes: 33 additions & 0 deletions python/infinicore/nn/functional/layer_norm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from typing import List

from infinicore.lib import _infinicore
from infinicore.tensor import Tensor


def layer_norm(
input: Tensor,
normalized_shape: List[int],
weight: Tensor,
bias: Tensor,
eps: float = 1e-5,
*,
out=None,
) -> Tensor:
r"""Apply Layer Normalization."""

assert normalized_shape == weight.shape, (
"normalized_shape does not match weight.shape."
)

if out is None:
return Tensor(
_infinicore.layer_norm(
input._underlying, weight._underlying, bias._underlying, eps
)
)

_infinicore.layer_norm_(
out._underlying, input._underlying, weight._underlying, bias._underlying, eps
)

return out
27 changes: 27 additions & 0 deletions src/infinicore/nn/layer_norm.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#include "infinicore/nn/layer_norm.hpp"
#include "infinicore/ops.hpp"
#include <cmath>
#include <stdexcept>

namespace infinicore::nn {

LayerNorm::LayerNorm(size_t normalized_shape, double eps, const DataType &dtype, const Device &device)
: normalized_shape_(normalized_shape),
eps_(eps),
dtype_(dtype) {

device_ = device;

INFINICORE_NN_PARAMETER_INIT(weight, ({normalized_shape}, dtype_, device));
INFINICORE_NN_PARAMETER_INIT(bias, ({normalized_shape}, dtype_, device));
}

Tensor LayerNorm::forward(const Tensor &x) const {
return op::layer_norm(x, weight_, bias_, static_cast<float>(eps_));
}

std::string LayerNorm::extra_repr() const {
return "LayerNorm(normalized_shape=" + std::to_string(normalized_shape_) + ", eps=" + std::to_string(eps_) + ", dtype=" + std::to_string(static_cast<int>(dtype_)) + ")";
}

} // namespace infinicore::nn
42 changes: 42 additions & 0 deletions src/infinicore/ops/layer_norm/layer_norm.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#include "infinicore/ops/layer_norm.hpp"
#include "../../utils.hpp"

namespace infinicore::op {
INFINICORE_GRAPH_OP_DISPATCHERS_IMPL(LayerNorm);

LayerNorm::LayerNorm(Tensor y, Tensor standardization, Tensor std_deviation, const Tensor &x, const Tensor &weight, const Tensor &bias, float epsilon) {
INFINICORE_ASSERT_TENSORS_SAME_DEVICE(y, standardization, std_deviation, x, weight);
INFINICORE_GRAPH_OP_DISPATCH(y->device().getType(), y, standardization, std_deviation, x, weight, bias, epsilon);
}

void LayerNorm::execute(Tensor y, Tensor standardization, Tensor std_deviation, const Tensor &x, const Tensor &weight, const Tensor &bias, float epsilon) {
INFINICORE_GRAPH_OP_RECORD_OR_RUN(LayerNorm, y, standardization, std_deviation, x, weight, bias, epsilon);
}

Tensor layer_norm(const Tensor &x, const Tensor &weight, const Tensor &bias, float epsilon) {
auto y = Tensor::empty(x->shape(), x->dtype(), x->device());
auto reduced_shape = x->shape();
reduced_shape.pop_back();
auto standardization = Tensor::empty(x->shape(), x->dtype(), x->device());
auto std_deviation = Tensor::empty(reduced_shape, x->dtype(), x->device());
layer_norm_(y, standardization, std_deviation, x, weight, bias, epsilon);
return y;
}

void layer_norm_(Tensor y, Tensor standardization, Tensor std_deviation, const Tensor &x, const Tensor &weight, const Tensor &bias, float epsilon) {
LayerNorm::execute(y, standardization, std_deviation, x, weight, bias, epsilon);
}

void layer_norm_(Tensor y, const Tensor &x, const Tensor &weight, const Tensor &bias, float epsilon) {
auto reduced_shape = x->shape();
reduced_shape.pop_back();
auto standardization = Tensor::empty(x->shape(), x->dtype(), x->device());
auto std_deviation = Tensor::empty(reduced_shape, x->dtype(), x->device());
LayerNorm::execute(y, standardization, std_deviation, x, weight, bias, epsilon);
}

void layer_norm_for_pybind(Tensor y, const Tensor &x, const Tensor &weight, const Tensor &bias, float epsilon) {
layer_norm_(y, x, weight, bias, epsilon);
}

} // namespace infinicore::op
65 changes: 65 additions & 0 deletions src/infinicore/ops/layer_norm/layer_norm_infiniop.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
#include "infinicore/ops/layer_norm.hpp"

#include "../infiniop_impl.hpp"

namespace infinicore::op::layer_norm_impl::infiniop {

INFINIOP_CACHABLE_DESCRIPTOR(Descriptor, LayerNorm, 100);

struct PlannedMeta {
std::shared_ptr<Descriptor> descriptor;
graph::GraphTensor workspace, y, standardization, std_deviation, x, weight, bias;
};

void *plan(Tensor y, Tensor standardization, Tensor std_deviation, const Tensor &x, const Tensor &weight, const Tensor &bias, float epsilon) {
size_t seed = hash_combine(y, standardization, std_deviation, x, weight, bias, epsilon);

INFINIOP_CACHABLE_DESCRIPTOR_GET_OR_CREATE(
Descriptor, descriptor, LayerNorm,
seed,
y->desc(),
standardization->desc(),
std_deviation->desc(),
x->desc(),
weight->desc(),
bias->desc(),
epsilon);

INFINIOP_WORKSPACE_TENSOR(workspace, LayerNorm, descriptor);

return new PlannedMeta{
descriptor,
graph::GraphTensor(workspace),
graph::GraphTensor(y),
graph::GraphTensor(standardization),
graph::GraphTensor(std_deviation),
graph::GraphTensor(x),
graph::GraphTensor(weight),
graph::GraphTensor(bias)};
}

void run(void *planned_meta) {
auto planned = reinterpret_cast<PlannedMeta *>(planned_meta);

INFINICORE_CHECK_ERROR(
infiniopLayerNorm(
planned->descriptor->desc,
planned->workspace->data(),
planned->workspace->numel(),
planned->y->data(),
planned->standardization->data(),
planned->std_deviation->data(),
planned->x->data(),
planned->weight->data(),
planned->bias->data(),
context::getStream()));
}

void cleanup(void **planned_meta_ptr) {
delete *reinterpret_cast<PlannedMeta **>(planned_meta_ptr);
*planned_meta_ptr = nullptr;
}

INFINICORE_GRAPH_OP_REGISTER_ALLDEVICE(LayerNorm, &plan, &run, &cleanup);

} // namespace infinicore::op::layer_norm_impl::infiniop
2 changes: 2 additions & 0 deletions src/infinicore/pybind11/ops.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
#include "ops/kron.hpp"
#include "ops/kthvalue.hpp"
#include "ops/kv_caching.hpp"
#include "ops/layer_norm.hpp"
#include "ops/ldexp.hpp"
#include "ops/lerp.hpp"
#include "ops/linear.hpp"
Expand Down Expand Up @@ -216,6 +217,7 @@ inline void bind(py::module &m) {
bind_triplet_margin_loss(m);
bind_selu(m);
bind_sinh(m);
bind_layer_norm(m);
}

} // namespace infinicore::ops
48 changes: 48 additions & 0 deletions src/infinicore/pybind11/ops/layer_norm.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#pragma once

#include <pybind11/pybind11.h>

#include "infinicore/ops/layer_norm.hpp"

namespace py = pybind11;

namespace infinicore::ops {

inline void bind_layer_norm(py::module &m) {
m.def("layer_norm",
&op::layer_norm,
py::arg("x"),
py::arg("weight"),
py::arg("bias"),
py::arg("epsilon") = 1e-5f,
R"doc(Layer Normalization.

Args:
x: Input tensor
weight: Scale weights
bias: Bias weights
epsilon: Small constant for numerical stability, default is 1e-5

Returns:
Normalized tensor with same shape as input
)doc");

m.def("layer_norm_",
&op::layer_norm_for_pybind,
py::arg("y"),
py::arg("x"),
py::arg("weight"),
py::arg("bias"),
py::arg("epsilon") = 1e-5f,
R"doc(In-place Layer Normalization.

Args:
y: Output tensor
x: Input tensor
weight: Scale weights
bias: Bias weights
epsilon: Small constant for numerical stability, default is 1e-5
)doc");
}

} // namespace infinicore::ops
2 changes: 2 additions & 0 deletions src/infiniop/devices/metax/metax_kernel_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
#include <maca_bfloat16.h>
#include <maca_fp16.h>
#include <maca_fp8.h>
#include <mccub/block/block_reduce.cuh>
#else
#include <hccub/block/block_reduce.cuh>
#include <hpcc_bfloat16.h>
#include <hpcc_fp16.h>
#include <hpcc_fp8.h>
Expand Down
Loading
Loading