InfiniTensor
diff --git a/‎include/infinicore/nn/module.hpp‎
Lines changed: 14 additions & 0 deletions b/‎include/infinicore/nn/module.hpp‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎include/infinicore/nn/rope.hpp‎
Lines changed: 79 additions & 0 deletions b/‎include/infinicore/nn/rope.hpp‎
Lines changed: 79 additions & 0 deletions
diff --git a/‎include/infinicore/ops.hpp‎
Lines changed: 1 addition & 0 deletions b/‎include/infinicore/ops.hpp‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎include/infinicore/ops/rope.hpp‎
Lines changed: 21 additions & 0 deletions b/‎include/infinicore/ops/rope.hpp‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎src/infinicore-test/main.cc‎
Lines changed: 7 additions & 17 deletions b/‎src/infinicore-test/main.cc‎
Lines changed: 7 additions & 17 deletions
@@ -23,6 +23,8 @@ class Module {
 protected:
     Tensor register_parameter(const std::string &name, Parameter param);
 
+    Tensor register_buffer(const std::string &name, Parameter buffer);
+
     // Add an existing submodule to this module's hierarchy
     // Template parameter M must be a type derived from Module
     // Returns the submodule for convenience (allows method chaining)
@@ -72,6 +74,7 @@ class Module {
 protected:
     Device device_;
     std::unordered_map<std::string, std::shared_ptr<Module>> submodules_;
+    std::unordered_map<std::string, Parameter> buffers_;
     std::unordered_map<std::string, Parameter> parameters_;
 
 private:
@@ -134,4 +137,15 @@ class Module {
     name##_ = infinicore::nn::Parameter args; \
     this->register_parameter(#name, name##_)
 
+// Declare a buffer member variable
+#define INFINICORE_NN_BUFFER(name) \
+    infinicore::nn::Parameter name##_
+
+// Initialize a buffer in constructor
+// Usage: INFINICORE_NN_BUFFER_INIT(name, (shape, dtype, device))
+// Example: INFINICORE_NN_BUFFER_INIT(cache, ({max_seq_len, head_dim}, DataType::F32, device))
+#define INFINICORE_NN_BUFFER_INIT(name, args) \
+    name##_ = infinicore::nn::Parameter args; \
+    this->register_buffer(#name, name##_)
+
 } // namespace infinicore::nn
@@ -0,0 +1,79 @@
+#pragma once
+
+#include "module.hpp"
+#include "../context/context.hpp"
+#include "../tensor.hpp"
+#include <memory>
+
+namespace infinicore::nn {
+
+class RoPE : public Module {
+public:
+    /**
+     * @brief RoPE algorithm type
+     */
+    enum class Algo {
+        GPT_J = 0,    // GPT-J style RoPE algorithm (Interleave even and odd dimensions)
+        GPT_NEOX = 1, // GPT-NeoX style RoPE algorithm (First half dimensions for sin, second half for cos)
+    };
+
+    /**
+     * @brief Construct a RoPE layer
+     *
+     * @param head_dim Dimension of each attention head (must be even)
+     * @param max_seq_len Maximum sequence length for pre-computed cache
+     * @param theta Base frequency for rotary embeddings (default: 10000.0)
+     * @param algo RoPE algorithm type (default: Algo::GPT_J)
+     * @param dtype Data type for sin/cos cache (default: DataType::F32)
+     * @param device Device to create the cache on
+     */
+    RoPE(size_t head_dim,
+         size_t max_seq_len,
+         double theta = 10000.0,
+         Algo algo = Algo::GPT_J,
+         const DataType &dtype = DataType::F32,
+         const Device &device = Device());
+
+    /**
+     * @brief Forward pass: apply RoPE to a tensor
+     *
+     * @param x Input tensor of shape (..., head_dim) where ... is any number of dimensions
+     * @param pos Position IDs tensor of shape (*,) typically [seq_len] or [batch, seq_len]
+     * @return Rotated tensor with same shape as input
+     *
+     * Applies rotary position embeddings to the input tensor.
+     * For attention mechanisms, call this method separately for query and key tensors.
+     *
+     * Common input shapes:
+     *   - [batch, num_heads, seq_len, head_dim]
+     *   - [batch, seq_len, num_heads, head_dim]
+     *   - [seq_len, head_dim]
+     */
+    Tensor forward(const Tensor &x, const Tensor &pos) const;
+
+    // Module information
+    size_t head_dim() const { return head_dim_; }
+    size_t max_seq_len() const { return max_seq_len_; }
+    double theta() const { return theta_; }
+    Algo algo() const { return algo_; }
+    DataType dtype() const { return dtype_; }
+
+    // String representation
+    std::string extra_repr() const;
+
+protected:
+    // Buffers (sin and cos cache tables) - not exposed in state_dict
+    INFINICORE_NN_BUFFER(sin_cache);
+    INFINICORE_NN_BUFFER(cos_cache);
+
+private:
+    void initialize_cache();
+
+    size_t head_dim_;      // Dimension of each attention head
+    size_t max_seq_len_;   // Maximum sequence length
+    double theta_;         // Base frequency for rotary embeddings
+    Algo algo_;            // RoPE algorithm type
+    DataType dtype_;       // Data type for cache tables
+};
+
+} // namespace infinicore::nn
@@ -7,5 +7,6 @@
 #include "ops/ones.hpp"
 #include "ops/rearrange.hpp"
 #include "ops/rms_norm.hpp"
+#include "ops/rope.hpp"
 #include "ops/silu.hpp"
 #include "ops/swiglu.hpp"
@@ -0,0 +1,21 @@
+#pragma once
+
+#include "../device.hpp"
+#include "../tensor.hpp"
+#include "../nn/rope.hpp"
+#include "common/op.hpp"
+
+namespace infinicore::op {
+class RoPE {
+public:
+    using schema = void (*)(Tensor, const Tensor &, const Tensor &, const Tensor &, const Tensor &, infinicore::nn::RoPE::Algo);
+    static void execute(Tensor x_out, const Tensor &x, const Tensor &pos, const Tensor &sin_cache, const Tensor &cos_cache, infinicore::nn::RoPE::Algo algo);
+    static common::OpDispatcher<schema> &dispatcher();
+};
+
+// Internal function
+void rope_(Tensor x_out, const Tensor &x, const Tensor &pos, const Tensor &sin_cache, const Tensor &cos_cache, infinicore::nn::RoPE::Algo algo);
+
+// Public API that uses infinicore::nn::RoPE::Algo
+Tensor rope(const Tensor &x, const Tensor &pos, const Tensor &sin_cache, const Tensor &cos_cache, infinicore::nn::RoPE::Algo algo);
+} // namespace infinicore::op
@@ -141,12 +141,8 @@ ParsedArgs parseArgs(int argc, char *argv[]) {
 
 int main(int argc, char *argv[]) {
     try {
-        // Initialize spdlog for debugging
-        spdlog::set_level(spdlog::level::debug);
-        spdlog::info("Starting InfiniCore Memory Management Test Suite");
-
         ParsedArgs args = parseArgs(argc, argv);
-        spdlog::debug("Arguments parsed successfully");
+        spdlog::info("Arguments parsed successfully");
 
         std::cout << "==============================================\n"
                   << "InfiniCore Memory Management Test Suite\n"
@@ -156,31 +152,25 @@ int main(int argc, char *argv[]) {
                   << "Iterations: " << args.iterations << "\n"
                   << "==============================================" << std::endl;
 
-        spdlog::debug("About to initialize InfiniCore context");
+        spdlog::info("About to initialize InfiniCore context");
         // Initialize InfiniCore context
         infinicore::context::setDevice(infinicore::Device(static_cast<infinicore::Device::Type>(args.device_type), 0));
-        spdlog::debug("InfiniCore context initialized successfully");
+        spdlog::info("InfiniCore context initialized successfully");
 
-        spdlog::debug("Creating test runner");
+        spdlog::info("Creating test runner");
         // Create test runner
         infinicore::test::InfiniCoreTestRunner runner;
-        spdlog::debug("Test runner created successfully");
+        spdlog::info("Test runner created successfully");
 
         // Add tests based on arguments
         if (args.run_basic) {
-            spdlog::debug("Adding BasicMemoryTest");
             runner.addTest(std::make_unique<infinicore::test::BasicMemoryTest>());
-            spdlog::debug("BasicMemoryTest added successfully");
 
-            spdlog::debug("Adding TensorDestructorTest");
             runner.addTest(std::make_unique<infinicore::test::TensorDestructorTest>());
-            spdlog::debug("TensorDestructorTest added successfully");
         }
 
         if (args.run_module) {
-            spdlog::debug("Adding NNModuleTest");
             runner.addTest(std::make_unique<infinicore::test::NNModuleTest>());
-            spdlog::debug("NNModuleTest added successfully");
         }
 
         if (args.run_concurrency) {
@@ -203,10 +193,10 @@ int main(int argc, char *argv[]) {
             runner.addTest(std::make_unique<infinicore::test::StressTest>());
         }
 
-        spdlog::debug("About to run all tests");
+        spdlog::info("About to run all tests");
         // Run all tests
         auto results = runner.runAllTests();
-        spdlog::debug("All tests completed");
+        spdlog::info("All tests completed");
 
         // Count results and collect failed tests
         size_t passed = 0, failed = 0;