InfiniTensor
diff --git a/‎include/infinicore/nn.hpp‎
Lines changed: 2 additions & 0 deletions b/‎include/infinicore/nn.hpp‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎include/infinicore/nn/embedding.hpp‎
Lines changed: 94 additions & 0 deletions b/‎include/infinicore/nn/embedding.hpp‎
Lines changed: 94 additions & 0 deletions
diff --git a/‎include/infinicore/nn/rmsnorm.hpp‎
Lines changed: 80 additions & 0 deletions b/‎include/infinicore/nn/rmsnorm.hpp‎
Lines changed: 80 additions & 0 deletions
@@ -1,3 +1,5 @@
 #pragma once
 
+#include "nn/embedding.hpp"
 #include "nn/linear.hpp"
+#include "nn/rmsnorm.hpp"
@@ -0,0 +1,94 @@
+#pragma once
+
+#include "module.hpp"
+#include "../ops.hpp"
+#include <optional>
+
+namespace infinicore::nn {
+
+/**
+ * @brief Embedding layer that maps indices to dense vectors
+ *
+ * A simple lookup table that stores embeddings of a fixed dictionary and size.
+ * This module is often used to store word embeddings and retrieve them using indices.
+ * The input to the module is a tensor of indices, and the output is the corresponding
+ * embedding vectors.
+ *
+ * Similar to PyTorch's nn.Embedding:
+ * https://pytorch.org/docs/stable/generated/torch.nn.Embedding.html
+ *
+ * Example:
+ * @code
+ *   // Create embedding: 10000 words, 300-dimensional embeddings
+ *   auto embedding = Embedding(10000, 300);
+ *
+ *   // Input: tensor of indices [batch_size, seq_len]
+ *   auto indices = Tensor::from_data({2, 5}, {3, 5, 12, 8, 99, 0, 1, 45, 67, 23});
+ *
+ *   // Output: [batch_size, seq_len, embedding_dim] = [2, 5, 300]
+ *   auto embeddings = embedding.forward(indices);
+ * @endcode
+ */
+class Embedding : public Module {
+public:
+    /**
+     * @brief Construct an Embedding layer
+     *
+     * @param num_embeddings Size of the dictionary of embeddings (vocabulary size)
+     * @param embedding_dim The size of each embedding vector
+     * @param padding_idx If specified, the entries at padding_idx do not contribute to gradient
+     *                    and the embedding vector at padding_idx is not updated during training
+     * @param device Device to create the embedding weight on
+     */
+    Embedding(size_t num_embeddings,
+              size_t embedding_dim,
+              std::optional<int64_t> padding_idx = std::nullopt,
+              const Device &device = Device());
+
+    /**
+     * @brief Forward pass: lookup embeddings for given indices
+     *
+     * @param indices Tensor containing indices into the embedding matrix.
+     *                Can be any shape (*), typically [batch_size] or [batch_size, seq_len]
+     * @return Tensor containing the embedding vectors.
+     *         Shape: (*, embedding_dim) where * matches the input shape
+     *
+     * Example:
+     *   Input shape: [2, 3] -> Output shape: [2, 3, embedding_dim]
+     *   Input shape: [10] -> Output shape: [10, embedding_dim]
+     */
+    Tensor forward(const Tensor &indices) const;
+
+    /**
+     * @brief Create an Embedding from pretrained vectors
+     *
+     * @param embeddings Pretrained embedding matrix of shape [num_embeddings, embedding_dim]
+     * @param freeze If true, embeddings will not be updated during training
+     * @param padding_idx Optional padding index
+     * @return Embedding module initialized with the pretrained vectors
+     */
+    static Embedding from_pretrained(const Tensor &embeddings,
+                                     bool freeze = true,
+                                     std::optional<int64_t> padding_idx = std::nullopt);
+
+    // Accessors for parameters
+    Tensor weight() const { return weight_; }
+
+    // Module information
+    size_t num_embeddings() const { return num_embeddings_; }
+    size_t embedding_dim() const { return embedding_dim_; }
+    std::optional<int64_t> padding_idx() const { return padding_idx_; }
+
+    // String representation
+    std::string extra_repr() const;
+
+    // Direct access to parameters as fields
+    Parameter weight_;
+
+private:
+    size_t num_embeddings_;   // Vocabulary size
+    size_t embedding_dim_;    // Embedding dimension
+    std::optional<int64_t> padding_idx_;  // Optional padding index
+};
+
+} // namespace infinicore::nn
@@ -0,0 +1,80 @@
+#pragma once
+
+#include "module.hpp"
+#include "../ops.hpp"
+
+namespace infinicore::nn {
+
+/**
+ * @brief Root Mean Square Layer Normalization (RMSNorm)
+ *
+ * Applies Root Mean Square Layer Normalization over the last dimension.
+ * Unlike LayerNorm, RMSNorm doesn't subtract mean and doesn't use bias.
+ *
+ * Formula: y = (x / RMS(x)) * weight
+ * where RMS(x) = sqrt(mean(x^2) + eps)
+ *
+ * Used in LLaMA, Galactica, and other modern language models as a
+ * simpler and faster alternative to LayerNorm.
+ *
+ * Reference:
+ * - "Root Mean Square Layer Normalization" (https://arxiv.org/abs/1910.07467)
+ * - LLaMA implementation: https://github.com/facebookresearch/llama
+ *
+ * Example:
+ * @code
+ *   // Create RMSNorm for hidden size 4096
+ *   auto norm = RMSNorm(4096);
+ *
+ *   // Input: [batch, seq_len, hidden_size]
+ *   auto input = Tensor::randn({2, 10, 4096});
+ *
+ *   // Output: [batch, seq_len, hidden_size]
+ *   auto output = norm.forward(input);
+ * @endcode
+ */
+class RMSNorm : public Module {
+public:
+    /**
+     * @brief Construct a RMSNorm layer
+     *
+     * @param normalized_shape Size of the feature dimension to normalize (typically hidden_size)
+     * @param eps Small constant for numerical stability (default: 1e-6)
+     * @param device Device to create the weight on
+     */
+    RMSNorm(size_t normalized_shape,
+            double eps = 1e-6,
+            const Device &device = Device());
+
+    /**
+     * @brief Forward pass: apply RMSNorm
+     *
+     * @param x Input tensor of shape (*, normalized_shape) where * is any number of dimensions
+     * @return Normalized tensor with same shape as input
+     *
+     * The normalization is applied over the last dimension.
+     * For example:
+     *   Input: [batch, seq_len, hidden_size] -> normalize over hidden_size
+     *   Input: [batch, hidden_size] -> normalize over hidden_size
+     */
+    Tensor forward(const Tensor &x) const;
+
+    // Accessors for parameters
+    Tensor weight() const { return weight_; }
+
+    // Module information
+    size_t normalized_shape() const { return normalized_shape_; }
+    double eps() const { return eps_; }
+
+    // String representation
+    std::string extra_repr() const;
+
+    // Direct access to parameters as fields
+    Parameter weight_;
+
+private:
+    size_t normalized_shape_;  // Size of the feature dimension
+    double eps_;               // Epsilon for numerical stability
+};
+
+} // namespace infinicore::nn