diff --git a/include/infinicore/ops.hpp b/include/infinicore/ops.hpp index e66df4659..23d2457a8 100644 --- a/include/infinicore/ops.hpp +++ b/include/infinicore/ops.hpp @@ -1,5 +1,8 @@ #pragma once -#include "op/matmul.hpp" -#include "op/ones.hpp" -#include "op/rearrange.hpp" +#include "ops/add.hpp" +#include "ops/attention.hpp" +#include "ops/matmul.hpp" +#include "ops/ones.hpp" +#include "ops/rearrange.hpp" +#include "ops/rms_norm.hpp" diff --git a/include/infinicore/ops/add.hpp b/include/infinicore/ops/add.hpp new file mode 100644 index 000000000..1dd5df0ff --- /dev/null +++ b/include/infinicore/ops/add.hpp @@ -0,0 +1,17 @@ +#pragma once + +#include "../device.hpp" +#include "common/op.hpp" + +namespace infinicore::op { +class Add { +public: + using schema = void (*)(Tensor, Tensor, Tensor); + static void execute(Tensor c, Tensor a, Tensor b); + static common::OpDispatcher &dispatcher(); +}; + +Tensor add(Tensor a, Tensor b); +void add_(Tensor c, Tensor a, Tensor b); +Tensor operator+(Tensor a, Tensor b); +} // namespace infinicore::op diff --git a/include/infinicore/ops/attention.hpp b/include/infinicore/ops/attention.hpp new file mode 100644 index 000000000..1bc447c77 --- /dev/null +++ b/include/infinicore/ops/attention.hpp @@ -0,0 +1,16 @@ +#pragma once + +#include "../device.hpp" +#include "common/op.hpp" + +namespace infinicore::op { +class Attention { +public: + using schema = void (*)(Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, size_t); + static void execute(Tensor out, Tensor q, Tensor k, Tensor v, Tensor k_cache, Tensor v_cache, size_t pos); + static common::OpDispatcher &dispatcher(); +}; + +Tensor attention(Tensor q, Tensor k, Tensor v, Tensor k_cache, Tensor v_cache, size_t pos); +void attention_(Tensor out, Tensor q, Tensor k, Tensor v, Tensor k_cache, Tensor v_cache, size_t pos); +} // namespace infinicore::op diff --git a/include/infinicore/op/common/cache.hpp b/include/infinicore/ops/common/cache.hpp similarity index 100% rename from include/infinicore/op/common/cache.hpp rename to include/infinicore/ops/common/cache.hpp diff --git a/include/infinicore/op/common/dispatcher.hpp b/include/infinicore/ops/common/dispatcher.hpp similarity index 86% rename from include/infinicore/op/common/dispatcher.hpp rename to include/infinicore/ops/common/dispatcher.hpp index 092cc099b..d0bf93f0f 100644 --- a/include/infinicore/op/common/dispatcher.hpp +++ b/include/infinicore/ops/common/dispatcher.hpp @@ -8,19 +8,19 @@ namespace infinicore::op::common { template class OpDispatcher { public: - void registerDevice(Device::Type device_type, Fn fn, bool override_existing=true) { - if (table_[(size_t)device_type] == nullptr || override_existing){ + void registerDevice(Device::Type device_type, Fn fn, bool override_existing = true) { + if (table_[(size_t)device_type] == nullptr || override_existing) { table_[(size_t)device_type] = fn; } } - void registerDevice(std::initializer_list device_types, Fn fn, bool override_existing=true) { + void registerDevice(std::initializer_list device_types, Fn fn, bool override_existing = true) { for (auto device_type : device_types) { registerDevice(device_type, fn, override_existing); } } - void registerAll(Fn fn, bool override_existing=true) { + void registerAll(Fn fn, bool override_existing = true) { for (size_t device_type = 0; device_type < static_cast(Device::Type::COUNT); ++device_type) { registerDevice((Device::Type)device_type, fn, override_existing); } diff --git a/include/infinicore/op/common/op.hpp b/include/infinicore/ops/common/op.hpp similarity index 100% rename from include/infinicore/op/common/op.hpp rename to include/infinicore/ops/common/op.hpp diff --git a/include/infinicore/op/matmul.hpp b/include/infinicore/ops/matmul.hpp similarity index 100% rename from include/infinicore/op/matmul.hpp rename to include/infinicore/ops/matmul.hpp diff --git a/include/infinicore/op/ones.hpp b/include/infinicore/ops/ones.hpp similarity index 100% rename from include/infinicore/op/ones.hpp rename to include/infinicore/ops/ones.hpp diff --git a/include/infinicore/op/rearrange.hpp b/include/infinicore/ops/rearrange.hpp similarity index 100% rename from include/infinicore/op/rearrange.hpp rename to include/infinicore/ops/rearrange.hpp diff --git a/include/infinicore/ops/rms_norm.hpp b/include/infinicore/ops/rms_norm.hpp new file mode 100644 index 000000000..1212c446e --- /dev/null +++ b/include/infinicore/ops/rms_norm.hpp @@ -0,0 +1,16 @@ +#pragma once + +#include "../device.hpp" +#include "common/op.hpp" + +namespace infinicore::op { +class RMSNorm { +public: + using schema = void (*)(Tensor, Tensor, Tensor, float); + static void execute(Tensor y, Tensor x, Tensor weight, float epsilon = 1e-5f); + static common::OpDispatcher &dispatcher(); +}; + +Tensor rms_norm(Tensor x, Tensor weight, float epsilon = 1e-5f); +void rms_norm_(Tensor y, Tensor x, Tensor weight, float epsilon = 1e-5f); +} // namespace infinicore::op diff --git a/python/infinicore/__init__.py b/python/infinicore/__init__.py index 961806b15..4757d7f29 100644 --- a/python/infinicore/__init__.py +++ b/python/infinicore/__init__.py @@ -25,8 +25,11 @@ uint8, ) from infinicore.ntops import use_ntops +from infinicore.ops.add import add +from infinicore.ops.attention import attention from infinicore.ops.matmul import matmul from infinicore.ops.rearrange import rearrange +from infinicore.ops.rms_norm import rms_norm from infinicore.tensor import ( empty, from_blob, @@ -66,8 +69,11 @@ # `ntops` integration. "use_ntops", # Operations. + "add", + "attention", "matmul", "rearrange", + "rms_norm", "empty", "from_blob", "ones", diff --git a/python/infinicore/dtype.py b/python/infinicore/dtype.py index ec06f5d7f..a323471c2 100644 --- a/python/infinicore/dtype.py +++ b/python/infinicore/dtype.py @@ -4,7 +4,6 @@ class dtype: def __init__(self, data_type): """An internal method. Please do not use this directly.""" - self._underlying = data_type def __repr__(self): @@ -29,9 +28,31 @@ def __repr__(self): _infinicore.DataType.C128: "complex128", _infinicore.DataType.BF16: "bfloat16", } - return f"infinicore.{repr_map[self._underlying]}" + def __eq__(self, other): + """ + Compare two dtype objects for equality. + + Args: + other: The object to compare with + + Returns: + bool: True if both objects are dtype instances with the same underlying data type + """ + if not isinstance(other, dtype): + return False + return self._underlying == other._underlying + + def __hash__(self): + """ + Return a hash value for the dtype object. + + Returns: + int: Hash value based on the underlying data type + """ + return hash(self._underlying) + float32 = dtype(_infinicore.DataType.F32) float = float32 diff --git a/python/infinicore/ops/add.py b/python/infinicore/ops/add.py new file mode 100644 index 000000000..239c7c6fd --- /dev/null +++ b/python/infinicore/ops/add.py @@ -0,0 +1,9 @@ +from infinicore.lib import _infinicore +from infinicore.tensor import Tensor + + +def add(input, other, *, out=None): + if out is None: + return Tensor(_infinicore.add(input._underlying, other._underlying)) + + _infinicore.add_(out._underlying, input._underlying, other._underlying) diff --git a/python/infinicore/ops/attention.py b/python/infinicore/ops/attention.py new file mode 100644 index 000000000..70e3d913a --- /dev/null +++ b/python/infinicore/ops/attention.py @@ -0,0 +1,26 @@ +from infinicore.lib import _infinicore +from infinicore.tensor import Tensor + + +def attention(q, k, v, k_cache, v_cache, pos, *, out=None): + if out is None: + return Tensor( + _infinicore.attention( + q._underlying, + k._underlying, + v._underlying, + k_cache._underlying, + v_cache._underlying, + pos, + ) + ) + + _infinicore.attention_( + out._underlying, + q._underlying, + k._underlying, + v._underlying, + k_cache._underlying, + v_cache._underlying, + pos, + ) diff --git a/python/infinicore/ops/rms_norm.py b/python/infinicore/ops/rms_norm.py new file mode 100644 index 000000000..6e8f788ee --- /dev/null +++ b/python/infinicore/ops/rms_norm.py @@ -0,0 +1,13 @@ +from infinicore.lib import _infinicore +from infinicore.tensor import Tensor + + +def rms_norm(input, weight, epsilon=1e-5, *, out=None): + if out is None: + return Tensor( + _infinicore.rms_norm(input._underlying, weight._underlying, epsilon) + ) + + _infinicore.rms_norm_( + out._underlying, input._underlying, weight._underlying, epsilon + ) diff --git a/src/infinicore/ops/add/add.cc b/src/infinicore/ops/add/add.cc new file mode 100644 index 000000000..4962e9dd8 --- /dev/null +++ b/src/infinicore/ops/add/add.cc @@ -0,0 +1,24 @@ +#include "infinicore/ops/add.hpp" + +namespace infinicore::op { + +common::OpDispatcher &Add::dispatcher() { + static common::OpDispatcher dispatcher_; + return dispatcher_; +}; + +void Add::execute(Tensor c, Tensor a, Tensor b) { + dispatcher().lookup(context::getDevice().getType())(c, a, b); +} + +Tensor add(Tensor a, Tensor b) { + auto c = Tensor::empty(a->shape(), a->dtype(), a->device()); + add_(c, a, b); + return c; +} + +void add_(Tensor c, Tensor a, Tensor b) { + Add::execute(c, a, b); +} + +} // namespace infinicore::op diff --git a/src/infinicore/ops/add/add_infiniop.cc b/src/infinicore/ops/add/add_infiniop.cc new file mode 100644 index 000000000..e034b94de --- /dev/null +++ b/src/infinicore/ops/add/add_infiniop.cc @@ -0,0 +1,52 @@ +#include "../../utils.hpp" +#include "infinicore/common/hash.hpp" +#include "infinicore/ops/add.hpp" +#include "infinicore/ops/common/cache.hpp" +#include + +namespace infinicore::op::add_impl::infiniop { + +thread_local common::OpCache caches( + 100, // capacity + [](infiniopAddDescriptor_t &desc) { + if (desc != nullptr) { + INFINICORE_CHECK_ERROR(infiniopDestroyAddDescriptor(desc)); + desc = nullptr; + } + }); + +void calculate(Tensor c, Tensor a, Tensor b) { + size_t seed = hash_combine(c, b, a); + + auto device_type = context::getDevice().getType(); + auto device_index = context::getDevice().getIndex(); + + auto &cache = caches.getCache(device_type, device_index); + + auto desc_opt = cache.get(seed); + infiniopAddDescriptor_t desc = nullptr; + + if (!desc_opt) { + INFINICORE_CHECK_ERROR(infiniopCreateAddDescriptor( + context::getInfiniopHandle(), &desc, + c->desc(), a->desc(), b->desc())); + cache.put(seed, desc); + } else { + desc = *desc_opt; + } + + size_t workspace_size = 0; + INFINICORE_CHECK_ERROR(infiniopGetAddWorkspaceSize(desc, &workspace_size)); + std::shared_ptr workspace = context::allocateMemory(workspace_size); + + INFINICORE_CHECK_ERROR(infiniopAdd( + desc, workspace->data(), workspace_size, + c->data(), a->data(), b->data(), context::getStream())); +} + +static bool registered = []() { + Add::dispatcher().registerAll(&calculate, false); + return true; +}(); + +} // namespace infinicore::op::add_impl::infiniop diff --git a/src/infinicore/ops/attention/attention.cc b/src/infinicore/ops/attention/attention.cc new file mode 100644 index 000000000..bf4fd8203 --- /dev/null +++ b/src/infinicore/ops/attention/attention.cc @@ -0,0 +1,28 @@ +#include "infinicore/ops/attention.hpp" + +namespace infinicore::op { + +common::OpDispatcher &Attention::dispatcher() { + static common::OpDispatcher dispatcher_; + return dispatcher_; +}; + +void Attention::execute(Tensor out, Tensor q, Tensor k, Tensor v, Tensor k_cache, Tensor v_cache, size_t pos) { + dispatcher().lookup(context::getDevice().getType())(out, q, k, v, k_cache, v_cache, pos); +} + +Tensor attention(Tensor q, Tensor k, Tensor v, Tensor k_cache, Tensor v_cache, size_t pos) { + size_t n_q_head = q->shape()[0]; + size_t seq_len = q->shape()[1]; + size_t head_dim = q->shape()[2]; + Shape shape = {seq_len, n_q_head, head_dim}; + auto out = Tensor::empty(shape, q->dtype(), q->device()); + attention_(out, q, k, v, k_cache, v_cache, pos); + return out; +} + +void attention_(Tensor out, Tensor q, Tensor k, Tensor v, Tensor k_cache, Tensor v_cache, size_t pos) { + Attention::execute(out, q, k, v, k_cache, v_cache, pos); +} + +} // namespace infinicore::op diff --git a/src/infinicore/ops/attention/attention_infiniop.cc b/src/infinicore/ops/attention/attention_infiniop.cc new file mode 100644 index 000000000..816cd884c --- /dev/null +++ b/src/infinicore/ops/attention/attention_infiniop.cc @@ -0,0 +1,54 @@ +#include "../../utils.hpp" +#include "infinicore/common/hash.hpp" +#include "infinicore/ops/attention.hpp" +#include "infinicore/ops/common/cache.hpp" +#include + +namespace infinicore::op::attention_impl::infiniop { + +thread_local common::OpCache caches( + 100, // capacity + [](infiniopAttentionDescriptor_t &desc) { + if (desc != nullptr) { + INFINICORE_CHECK_ERROR(infiniopDestroyAttentionDescriptor(desc)); + desc = nullptr; + } + }); + +void calculate(Tensor out, Tensor q, Tensor k, Tensor v, Tensor k_cache, Tensor v_cache, size_t pos) { + size_t seed = hash_combine(out, q, k, v, k_cache, v_cache, pos); + + auto device_type = context::getDevice().getType(); + auto device_index = context::getDevice().getIndex(); + + auto &cache = caches.getCache(device_type, device_index); + + auto desc_opt = cache.get(seed); + infiniopAttentionDescriptor_t desc = nullptr; + + if (!desc_opt) { + INFINICORE_CHECK_ERROR(infiniopCreateAttentionDescriptor( + context::getInfiniopHandle(), &desc, + out->desc(), q->desc(), k->desc(), v->desc(), + k_cache->desc(), v_cache->desc(), pos)); + cache.put(seed, desc); + } else { + desc = *desc_opt; + } + + size_t workspace_size = 0; + INFINICORE_CHECK_ERROR(infiniopGetAttentionWorkspaceSize(desc, &workspace_size)); + std::shared_ptr workspace = context::allocateMemory(workspace_size); + + INFINICORE_CHECK_ERROR(infiniopAttention( + desc, workspace->data(), workspace_size, + out->data(), q->data(), k->data(), v->data(), + k_cache->data(), v_cache->data(), context::getStream())); +} + +static bool registered = []() { + Attention::dispatcher().registerAll(&calculate, false); + return true; +}(); + +} // namespace infinicore::op::attention_impl::infiniop diff --git a/src/infinicore/op/matmul/matmul.cc b/src/infinicore/ops/matmul/matmul.cc similarity index 94% rename from src/infinicore/op/matmul/matmul.cc rename to src/infinicore/ops/matmul/matmul.cc index 33f5a157c..d04e98268 100644 --- a/src/infinicore/op/matmul/matmul.cc +++ b/src/infinicore/ops/matmul/matmul.cc @@ -1,4 +1,4 @@ -#include "infinicore/op/matmul.hpp" +#include "infinicore/ops/matmul.hpp" namespace infinicore::op { diff --git a/src/infinicore/op/matmul/matmul_infiniop.cc b/src/infinicore/ops/matmul/matmul_infiniop.cc similarity index 85% rename from src/infinicore/op/matmul/matmul_infiniop.cc rename to src/infinicore/ops/matmul/matmul_infiniop.cc index b68a4a243..3bd69c3f8 100644 --- a/src/infinicore/op/matmul/matmul_infiniop.cc +++ b/src/infinicore/ops/matmul/matmul_infiniop.cc @@ -1,7 +1,7 @@ #include "../../utils.hpp" #include "infinicore/common/hash.hpp" -#include "infinicore/op/common/cache.hpp" -#include "infinicore/op/matmul.hpp" +#include "infinicore/ops/common/cache.hpp" +#include "infinicore/ops/matmul.hpp" #include namespace infinicore::op::matmul_impl::infiniop { @@ -27,7 +27,9 @@ void calculate(Tensor c, Tensor a, Tensor b) { infiniopGemmDescriptor_t desc = nullptr; if (!desc_opt) { - INFINICORE_CHECK_ERROR(infiniopCreateGemmDescriptor(context::getInfiniopHandle(), &desc, c->desc(), a->desc(), b->desc())); + INFINICORE_CHECK_ERROR(infiniopCreateGemmDescriptor( + context::getInfiniopHandle(), &desc, + c->desc(), a->desc(), b->desc())); cache.put(seed, desc); } else { desc = *desc_opt; diff --git a/src/infinicore/op/ones/ones.cc b/src/infinicore/ops/ones/ones.cc similarity index 87% rename from src/infinicore/op/ones/ones.cc rename to src/infinicore/ops/ones/ones.cc index 9974202e5..c28403eaf 100644 --- a/src/infinicore/op/ones/ones.cc +++ b/src/infinicore/ops/ones/ones.cc @@ -1,4 +1,4 @@ -#include "infinicore/op/ones.hpp" +#include "infinicore/ops/ones.hpp" namespace infinicore::op { diff --git a/src/infinicore/op/rearrange/rearrange.cc b/src/infinicore/ops/rearrange/rearrange.cc similarity index 93% rename from src/infinicore/op/rearrange/rearrange.cc rename to src/infinicore/ops/rearrange/rearrange.cc index 808a8e442..fe9cb4e99 100644 --- a/src/infinicore/op/rearrange/rearrange.cc +++ b/src/infinicore/ops/rearrange/rearrange.cc @@ -1,4 +1,4 @@ -#include "infinicore/op/rearrange.hpp" +#include "infinicore/ops/rearrange.hpp" namespace infinicore::op { diff --git a/src/infinicore/op/rearrange/rearrange_infiniop.cc b/src/infinicore/ops/rearrange/rearrange_infiniop.cc similarity index 94% rename from src/infinicore/op/rearrange/rearrange_infiniop.cc rename to src/infinicore/ops/rearrange/rearrange_infiniop.cc index 8b9d15162..d0a02105b 100644 --- a/src/infinicore/op/rearrange/rearrange_infiniop.cc +++ b/src/infinicore/ops/rearrange/rearrange_infiniop.cc @@ -1,7 +1,7 @@ #include "../../utils.hpp" #include "infinicore/common/hash.hpp" -#include "infinicore/op/common/cache.hpp" -#include "infinicore/op/rearrange.hpp" +#include "infinicore/ops/common/cache.hpp" +#include "infinicore/ops/rearrange.hpp" #include namespace infinicore::op::rearrange_impl::infiniop { diff --git a/src/infinicore/ops/rms_norm/rms_norm.cc b/src/infinicore/ops/rms_norm/rms_norm.cc new file mode 100644 index 000000000..613608b0f --- /dev/null +++ b/src/infinicore/ops/rms_norm/rms_norm.cc @@ -0,0 +1,24 @@ +#include "infinicore/ops/rms_norm.hpp" + +namespace infinicore::op { + +common::OpDispatcher &RMSNorm::dispatcher() { + static common::OpDispatcher dispatcher_; + return dispatcher_; +}; + +void RMSNorm::execute(Tensor y, Tensor x, Tensor weight, float epsilon) { + dispatcher().lookup(context::getDevice().getType())(y, x, weight, epsilon); +} + +Tensor rms_norm(Tensor x, Tensor weight, float epsilon) { + auto y = Tensor::empty(x->shape(), x->dtype(), x->device()); + rms_norm_(y, x, weight, epsilon); + return y; +} + +void rms_norm_(Tensor y, Tensor x, Tensor weight, float epsilon) { + RMSNorm::execute(y, x, weight, epsilon); +} + +} // namespace infinicore::op diff --git a/src/infinicore/ops/rms_norm/rms_norm_infiniop.cc b/src/infinicore/ops/rms_norm/rms_norm_infiniop.cc new file mode 100644 index 000000000..3a4cdbefa --- /dev/null +++ b/src/infinicore/ops/rms_norm/rms_norm_infiniop.cc @@ -0,0 +1,52 @@ +#include "../../utils.hpp" +#include "infinicore/common/hash.hpp" +#include "infinicore/ops/common/cache.hpp" +#include "infinicore/ops/rms_norm.hpp" +#include + +namespace infinicore::op::rms_norm_impl::infiniop { + +thread_local common::OpCache caches( + 100, // capacity + [](infiniopRMSNormDescriptor_t &desc) { + if (desc != nullptr) { + INFINICORE_CHECK_ERROR(infiniopDestroyRMSNormDescriptor(desc)); + desc = nullptr; + } + }); + +void calculate(Tensor y, Tensor x, Tensor weight, float epsilon) { + size_t seed = hash_combine(y, x, weight, epsilon); + + auto device_type = context::getDevice().getType(); + auto device_index = context::getDevice().getIndex(); + + auto &cache = caches.getCache(device_type, device_index); + + auto desc_opt = cache.get(seed); + infiniopRMSNormDescriptor_t desc = nullptr; + + if (!desc_opt) { + INFINICORE_CHECK_ERROR(infiniopCreateRMSNormDescriptor( + context::getInfiniopHandle(), &desc, + y->desc(), x->desc(), weight->desc(), epsilon)); + cache.put(seed, desc); + } else { + desc = *desc_opt; + } + + size_t workspace_size = 0; + INFINICORE_CHECK_ERROR(infiniopGetRMSNormWorkspaceSize(desc, &workspace_size)); + std::shared_ptr workspace = context::allocateMemory(workspace_size); + + INFINICORE_CHECK_ERROR(infiniopRMSNorm( + desc, workspace->data(), workspace_size, + y->data(), x->data(), weight->data(), context::getStream())); +} + +static bool registered = []() { + RMSNorm::dispatcher().registerAll(&calculate, false); + return true; +}(); + +} // namespace infinicore::op::rms_norm_impl::infiniop diff --git a/src/infinicore/pybind11/infinicore.cc b/src/infinicore/pybind11/infinicore.cc index 31b159dfd..981c727d6 100644 --- a/src/infinicore/pybind11/infinicore.cc +++ b/src/infinicore/pybind11/infinicore.cc @@ -4,7 +4,7 @@ #include "context.hpp" #include "device.hpp" #include "dtype.hpp" -#include "op.hpp" +#include "ops.hpp" #include "tensor.hpp" namespace infinicore { @@ -13,7 +13,7 @@ PYBIND11_MODULE(_infinicore, m) { context::bind(m); device::bind(m); dtype::bind(m); - op::bind(m); + ops::bind(m); tensor::bind(m); } diff --git a/src/infinicore/pybind11/op.hpp b/src/infinicore/pybind11/op.hpp deleted file mode 100644 index 04d1160df..000000000 --- a/src/infinicore/pybind11/op.hpp +++ /dev/null @@ -1,17 +0,0 @@ -#pragma once - -#include - -#include "op/matmul.hpp" -#include "op/rearrange.hpp" - -namespace py = pybind11; - -namespace infinicore::op { - -inline void bind(py::module &m) { - bind_matmul(m); - bind_rearrange(m); -} - -} // namespace infinicore::op diff --git a/src/infinicore/pybind11/ops.hpp b/src/infinicore/pybind11/ops.hpp new file mode 100644 index 000000000..3cfef16ac --- /dev/null +++ b/src/infinicore/pybind11/ops.hpp @@ -0,0 +1,23 @@ +#pragma once + +#include + +#include "ops/add.hpp" +#include "ops/attention.hpp" +#include "ops/matmul.hpp" +#include "ops/rearrange.hpp" +#include "ops/rms_norm.hpp" + +namespace py = pybind11; + +namespace infinicore::ops { + +inline void bind(py::module &m) { + bind_add(m); + bind_attention(m); + bind_matmul(m); + bind_rearrange(m); + bind_rms_norm(m); +} + +} // namespace infinicore::ops diff --git a/src/infinicore/pybind11/ops/add.hpp b/src/infinicore/pybind11/ops/add.hpp new file mode 100644 index 000000000..fe7ac852f --- /dev/null +++ b/src/infinicore/pybind11/ops/add.hpp @@ -0,0 +1,26 @@ +#pragma once + +#include + +#include "infinicore/ops/add.hpp" + +namespace py = pybind11; + +namespace infinicore::ops { + +inline void bind_add(py::module &m) { + m.def("add", + &op::add, + py::arg("a"), + py::arg("b"), + R"doc(Addition of two tensors.)doc"); + + m.def("add_", + &op::add_, + py::arg("c"), + py::arg("a"), + py::arg("b"), + R"doc(In-place tensor addition.)doc"); +} + +} // namespace infinicore::ops diff --git a/src/infinicore/pybind11/ops/attention.hpp b/src/infinicore/pybind11/ops/attention.hpp new file mode 100644 index 000000000..4af2d5f74 --- /dev/null +++ b/src/infinicore/pybind11/ops/attention.hpp @@ -0,0 +1,56 @@ +#pragma once + +#include + +#include "infinicore/ops/attention.hpp" + +namespace py = pybind11; + +namespace infinicore::ops { + +inline void bind_attention(py::module &m) { + m.def("attention", + &op::attention, + py::arg("q"), + py::arg("k"), + py::arg("v"), + py::arg("k_cache"), + py::arg("v_cache"), + py::arg("pos"), + R"doc(Attention mechanism with KV caching. + +Args: + q: Query tensor + k: Key tensor + v: Value tensor + k_cache: Key cache tensor + v_cache: Value cache tensor + pos: Current position in the sequence + +Returns: + Output tensor from attention computation +)doc"); + + m.def("attention_", + &op::Attention::execute, + py::arg("out"), + py::arg("q"), + py::arg("k"), + py::arg("v"), + py::arg("k_cache"), + py::arg("v_cache"), + py::arg("pos"), + R"doc(In-place attention mechanism with KV caching. + +Args: + out: Output tensor + q: Query tensor + k: Key tensor + v: Value tensor + k_cache: Key cache tensor + v_cache: Value cache tensor + pos: Current position in the sequence +)doc"); +} + +} // namespace infinicore::ops diff --git a/src/infinicore/pybind11/op/matmul.hpp b/src/infinicore/pybind11/ops/matmul.hpp similarity index 82% rename from src/infinicore/pybind11/op/matmul.hpp rename to src/infinicore/pybind11/ops/matmul.hpp index ad98021e4..13591c90b 100644 --- a/src/infinicore/pybind11/op/matmul.hpp +++ b/src/infinicore/pybind11/ops/matmul.hpp @@ -2,11 +2,11 @@ #include -#include "infinicore/op/matmul.hpp" +#include "infinicore/ops/matmul.hpp" namespace py = pybind11; -namespace infinicore::op { +namespace infinicore::ops { inline void bind_matmul(py::module &m) { m.def("matmul", @@ -23,4 +23,4 @@ inline void bind_matmul(py::module &m) { R"doc(In-place matrix multiplication.)doc"); } -} // namespace infinicore::op +} // namespace infinicore::ops diff --git a/src/infinicore/pybind11/op/rearrange.hpp b/src/infinicore/pybind11/ops/rearrange.hpp similarity index 80% rename from src/infinicore/pybind11/op/rearrange.hpp rename to src/infinicore/pybind11/ops/rearrange.hpp index 94ac919bc..816b00079 100644 --- a/src/infinicore/pybind11/op/rearrange.hpp +++ b/src/infinicore/pybind11/ops/rearrange.hpp @@ -2,11 +2,11 @@ #include -#include "infinicore/op/rearrange.hpp" +#include "infinicore/ops/rearrange.hpp" namespace py = pybind11; -namespace infinicore::op { +namespace infinicore::ops { inline void bind_rearrange(py::module &m) { m.def("rearrange", @@ -21,4 +21,4 @@ inline void bind_rearrange(py::module &m) { R"doc(In-place tensor rearrangement.)doc"); } -} // namespace infinicore::op +} // namespace infinicore::ops diff --git a/src/infinicore/pybind11/ops/rms_norm.hpp b/src/infinicore/pybind11/ops/rms_norm.hpp new file mode 100644 index 000000000..1fd899c44 --- /dev/null +++ b/src/infinicore/pybind11/ops/rms_norm.hpp @@ -0,0 +1,44 @@ +#pragma once + +#include + +#include "infinicore/ops/rms_norm.hpp" + +namespace py = pybind11; + +namespace infinicore::ops { + +inline void bind_rms_norm(py::module &m) { + m.def("rms_norm", + &op::rms_norm, + py::arg("x"), + py::arg("weight"), + py::arg("epsilon") = 1e-5f, + R"doc(Root Mean Square Normalization. + +Args: + x: Input tensor + weight: Scale weights + epsilon: Small constant for numerical stability, default is 1e-5 + +Returns: + Normalized tensor with same shape as input +)doc"); + + m.def("rms_norm_", + &op::rms_norm_, + py::arg("y"), + py::arg("x"), + py::arg("weight"), + py::arg("epsilon") = 1e-5f, + R"doc(In-place Root Mean Square Normalization. + +Args: + y: Output tensor + x: Input tensor + weight: Scale weights + epsilon: Small constant for numerical stability, default is 1e-5 +)doc"); +} + +} // namespace infinicore::ops diff --git a/test/infinicore/framework/__init__.py b/test/infinicore/framework/__init__.py index 6b74d43e1..87d32f25f 100644 --- a/test/infinicore/framework/__init__.py +++ b/test/infinicore/framework/__init__.py @@ -1,31 +1,46 @@ -from .base import TestConfig, TestRunner, TestCase +# [file name]: __init__.py +# [file content begin] +from .base import TestConfig, TestRunner, TestCase, BaseOperatorTest +from .tensor import TensorSpec, TensorInitializer from .utils import ( - create_infinicore_tensor, compare_results, + create_test_comparator, debug, get_tolerance, + infinicore_tensor_from_torch, profile_operation, rearrange_tensor, + convert_infinicore_to_torch, ) from .config import get_test_devices, get_args from .devices import InfiniDeviceEnum, InfiniDeviceNames, torch_device_map from .datatypes import to_torch_dtype, to_infinicore_dtype +from .runner import GenericTestRunner +from .templates import BinaryOperatorTest, UnaryOperatorTest __all__ = [ + "TensorSpec", + "TensorInitializer", "TestConfig", "TestRunner", "TestCase", - "create_infinicore_tensor", + "BaseOperatorTest", "compare_results", + "create_test_comparator", + "convert_infinicore_to_torch", "debug", + "get_args", + "get_test_devices", "get_tolerance", + "infinicore_tensor_from_torch", "profile_operation", "rearrange_tensor", - "get_test_devices", - "get_args", "InfiniDeviceEnum", "InfiniDeviceNames", "torch_device_map", "to_torch_dtype", "to_infinicore_dtype", + "GenericTestRunner", + "BinaryOperatorTest", + "UnaryOperatorTest", ] diff --git a/test/infinicore/framework/base.py b/test/infinicore/framework/base.py index 174137efe..1ee4a5294 100644 --- a/test/infinicore/framework/base.py +++ b/test/infinicore/framework/base.py @@ -1,18 +1,90 @@ import torch import infinicore -from .devices import InfiniDeviceNames -from .utils import synchronize_device + +from abc import ABC, abstractmethod +from typing import List, Dict, Any, Tuple, Union, Callable, Optional + +from .datatypes import to_torch_dtype, to_infinicore_dtype +from .devices import InfiniDeviceNames, torch_device_map +from .tensor import TensorSpec, TensorInitializer +from .utils import ( + create_test_comparator, + infinicore_tensor_from_torch, + profile_operation, + rearrange_tensor, + synchronize_device, +) class TestCase: - """Base test case class""" + """Test case""" + + OUT_OF_PLACE = "out_of_place" + IN_PLACE = "in_place" + BOTH = "both" + + def __init__(self, operation_mode, inputs, output=None, **kwargs): + if operation_mode not in [self.IN_PLACE, self.OUT_OF_PLACE, self.BOTH]: + raise ValueError(f"Invalid operation_mode: {operation_mode}") + + if operation_mode == self.IN_PLACE and output is None: + raise ValueError("IN_PLACE mode requires output specification") + + self.operation_mode = operation_mode + self.inputs = [] + + for inp in inputs: + if isinstance(inp, (list, tuple)): + self.inputs.append(TensorSpec.from_tensor(inp)) + elif isinstance(inp, TensorSpec): + self.inputs.append(inp) + else: + self.inputs.append(inp) + + if isinstance(output, (list, tuple)): + self.output = TensorSpec.from_tensor(output) + else: + self.output = output - def __init__(self, *args, **kwargs): - self.args = args self.kwargs = kwargs + self.description = kwargs.pop("description", "") def __str__(self): - return f"TestCase{self.args}" + mode_str = self.operation_mode.upper() + input_strs = [] + for inp in self.inputs: + if hasattr(inp, "is_scalar") and inp.is_scalar: + dtype_str = f", dtype={inp.dtype}" if inp.dtype else "" + input_strs.append(f"scalar({inp.value}{dtype_str})") + elif hasattr(inp, "shape"): + dtype_str = f", dtype={inp.dtype}" if inp.dtype else "" + init_str = ( + f", init={inp.init_mode}" + if inp.init_mode != TensorInitializer.RANDOM + else "" + ) + if hasattr(inp, "is_contiguous") and not inp.is_contiguous: + input_strs.append(f"strided_tensor{inp.shape}{dtype_str}{init_str}") + else: + input_strs.append(f"tensor{inp.shape}{dtype_str}{init_str}") + else: + input_strs.append(str(inp)) + + base_str = f"TestCase(mode={mode_str}, inputs=[{', '.join(input_strs)}]" + if self.output: + dtype_str = f", dtype={self.output.dtype}" if self.output.dtype else "" + init_str = ( + f", init={self.output.init_mode}" + if self.output.init_mode != TensorInitializer.RANDOM + else "" + ) + base_str += f", output=tensor{self.output.shape}{dtype_str}{init_str}" + if self.kwargs: + base_str += f", kwargs={self.kwargs}" + if self.description: + base_str += f", desc='{self.description}'" + base_str += ")" + return base_str class TestConfig: @@ -26,6 +98,7 @@ def __init__( bench=False, num_prerun=10, num_iterations=1000, + dtype_combinations=None, ): self.tensor_dtypes = tensor_dtypes self.tolerance_map = tolerance_map @@ -33,6 +106,7 @@ def __init__( self.bench = bench self.num_prerun = num_prerun self.num_iterations = num_iterations + self.dtype_combinations = dtype_combinations class TestRunner: @@ -41,45 +115,61 @@ class TestRunner: def __init__(self, test_cases, test_config): self.test_cases = test_cases self.config = test_config - self.failed_tests = [] # Track failures + self.failed_tests = [] - def run_tests(self, devices, test_func): - """Run tests and track failures""" + def run_tests(self, devices, test_func, test_type="Test"): for device in devices: print(f"\n{'='*60}") - print(f"Testing on {InfiniDeviceNames[device]}") + print(f"Testing {test_type} on {InfiniDeviceNames[device]}") print(f"{'='*60}") - # filter unsupported data types tensor_dtypes = self._filter_tensor_dtypes_by_device( device, self.config.tensor_dtypes ) for test_case in self.test_cases: - for dtype in tensor_dtypes: - try: - test_func(device, test_case, dtype, self.config) - print(f"✓ {test_case} with {dtype} passed") - except Exception as e: - error_msg = f"{test_case} with {dtype} on {InfiniDeviceNames[device]}: {e}" - print(f"✗ {error_msg}") - self.failed_tests.append(error_msg) - if self.config.debug: - raise - - # Return whether any tests failed + if self.config.dtype_combinations: + for dtype_combo in self.config.dtype_combinations: + try: + test_func(device, test_case, dtype_combo, self.config) + combo_str = self._format_dtype_combo(dtype_combo) + print(f"✓ {test_case} with {combo_str} passed") + except Exception as e: + combo_str = self._format_dtype_combo(dtype_combo) + error_msg = f"{test_case} with {combo_str} on {InfiniDeviceNames[device]}: {e}" + print(f"✗ {error_msg}") + self.failed_tests.append(error_msg) + if self.config.debug: + raise + else: + for dtype in tensor_dtypes: + try: + test_func(device, test_case, dtype, self.config) + print(f"✓ {test_case} with {dtype} passed") + except Exception as e: + error_msg = f"{test_case} with {dtype} on {InfiniDeviceNames[device]}: {e}" + print(f"✗ {error_msg}") + self.failed_tests.append(error_msg) + if self.config.debug: + raise + return len(self.failed_tests) == 0 + def _format_dtype_combo(self, dtype_combo): + if isinstance(dtype_combo, dict): + return f"dtypes({dtype_combo})" + elif isinstance(dtype_combo, (list, tuple)): + return f"dtypes{tuple(dtype_combo)}" + else: + return str(dtype_combo) + def _filter_tensor_dtypes_by_device(self, device, tensor_dtypes): - """Filter data types based on device""" if device in (): - # Filter out unsupported data types on specified devices return [dt for dt in tensor_dtypes if dt != infinicore.bfloat16] else: return tensor_dtypes def print_summary(self): - """Print test summary""" if self.failed_tests: print(f"\n\033[91m{len(self.failed_tests)} tests failed:\033[0m") for failure in self.failed_tests: @@ -88,3 +178,246 @@ def print_summary(self): else: print("\n\033[92mAll tests passed!\033[0m") return True + + +class BaseOperatorTest(ABC): + """Base operator test""" + + def __init__(self, operator_name): + self.operator_name = operator_name + self.test_cases = self.get_test_cases() + self.tensor_dtypes = self.get_tensor_dtypes() + self.tolerance_map = self.get_tolerance_map() + self.dtype_combinations = self.get_dtype_combinations() + + @abstractmethod + def get_test_cases(self): + """Return list of TestCase objects""" + pass + + @abstractmethod + def get_tensor_dtypes(self): + """Return supported data types""" + pass + + @abstractmethod + def get_tolerance_map(self): + """Return tolerance configuration""" + pass + + def get_dtype_combinations(self): + """Return dtype combinations for mixed dtype tests""" + return None + + @abstractmethod + def torch_operator(self, *inputs, out=None, **kwargs): + """Unified PyTorch operator function""" + pass + + @abstractmethod + def infinicore_operator(self, *inputs, out=None, **kwargs): + """Unified Infinicore operator function""" + pass + + def create_strided_tensor( + self, shape, strides, dtype, device, init_mode=TensorInitializer.RANDOM + ): + """Create a non-contiguous tensor with specific strides""" + spec = TensorSpec.from_strided_tensor(shape, strides, dtype, init_mode) + return spec.create_torch_tensor(device, dtype) + + def prepare_inputs(self, test_case, device, dtype_config): + """Prepare input data""" + inputs = [] + + for i, input_spec in enumerate(test_case.inputs): + if isinstance(input_spec, TensorSpec): + if input_spec.is_scalar: + inputs.append(input_spec.value) + else: + tensor = input_spec.create_torch_tensor(device, dtype_config, i) + inputs.append(tensor) + else: + inputs.append(input_spec) + + return inputs, test_case.kwargs + + def get_output_dtype(self, test_case, dtype_config, torch_result=None): + """Determine output dtype - returns infinicore dtype, not torch dtype""" + if test_case.output and test_case.output.dtype is not None: + return test_case.output.dtype + elif isinstance(dtype_config, dict) and "output" in dtype_config: + return dtype_config["output"] + elif torch_result is not None: + return to_infinicore_dtype(torch_result.dtype) + else: + if isinstance(dtype_config, (list, tuple)): + return dtype_config[0] + else: + return dtype_config + + def run_test(self, device, test_case, dtype_config, config): + """Unified test execution flow""" + device_str = torch_device_map[device] + + if test_case.operation_mode == TestCase.BOTH: + out_of_place_case = TestCase( + TestCase.OUT_OF_PLACE, + test_case.inputs, + test_case.output, + **test_case.kwargs, + ) + self._run_single_test( + device, out_of_place_case, dtype_config, config, "OUT_OF_PLACE" + ) + + if test_case.output is not None: + in_place_case = TestCase( + TestCase.IN_PLACE, + test_case.inputs, + test_case.output, + **test_case.kwargs, + ) + self._run_single_test( + device, in_place_case, dtype_config, config, "IN_PLACE" + ) + return + + self._run_single_test( + device, test_case, dtype_config, config, test_case.operation_mode.upper() + ) + + def _run_single_test(self, device, test_case, dtype_config, config, mode_name): + """Run a single test with specified operation mode""" + device_str = torch_device_map[device] + + inputs, kwargs = self.prepare_inputs(test_case, device, dtype_config) + + infini_inputs = [] + for inp in inputs: + if isinstance(inp, torch.Tensor): + infini_tensor = infinicore_tensor_from_torch(inp) + infini_inputs.append(infini_tensor) + else: + infini_inputs.append(inp) + + if test_case.operation_mode == TestCase.OUT_OF_PLACE: + + def torch_op(): + return self.torch_operator(*inputs, **kwargs) + + torch_result = torch_op() + + if ( + isinstance(torch_result, torch.Tensor) + and not torch_result.is_contiguous() + ): + torch_result = torch_result.contiguous() + + def infini_op(): + return self.infinicore_operator(*infini_inputs, **kwargs) + + infini_result = infini_op() + + # Get comparison dtype (infinicore dtype) + comparison_dtype = self.get_output_dtype( + test_case, dtype_config, torch_result + ) + + compare_fn = create_test_comparator( + config, comparison_dtype, mode_name=f"{self.operator_name} {mode_name}" + ) + is_valid = compare_fn(infini_result, torch_result) + assert is_valid, f"{self.operator_name} {mode_name} test failed" + + if config.bench: + profile_operation( + f"PyTorch {self.operator_name} {mode_name}", + torch_op, + device_str, + config.num_prerun, + config.num_iterations, + ) + profile_operation( + f"Infinicore {self.operator_name} {mode_name}", + infini_op, + device_str, + config.num_prerun, + config.num_iterations, + ) + + else: + if not test_case.output: + raise ValueError("IN_PLACE test requires output specification") + + # Get output dtype and create output tensor + output_dtype = self.get_output_dtype(test_case, dtype_config) + output_shape = test_case.output.shape + + # Use TensorSpec to create output tensor with specified initialization mode + if test_case.output.is_contiguous or test_case.output.strides is None: + output_spec = TensorSpec.from_tensor( + output_shape, output_dtype, init_mode=test_case.output.init_mode + ) + else: + output_spec = TensorSpec.from_strided_tensor( + output_shape, + test_case.output.strides, + output_dtype, + init_mode=test_case.output.init_mode, + ) + + torch_output = output_spec.create_torch_tensor(device, output_dtype) + + # For non-contiguous tensors, we need to ensure zeros initialization + if ( + not test_case.output.is_contiguous + and test_case.output.strides is not None + ): + torch_output.zero_() + + def torch_op_inplace(): + self.torch_operator(*inputs, out=torch_output, **kwargs) + + torch_op_inplace() + + # Create infinicore output tensor + torch_dummy = torch.zeros( + output_shape, dtype=to_torch_dtype(output_dtype), device=device_str + ) + if ( + not test_case.output.is_contiguous + and not test_case.output.strides is None + ): + rearrange_tensor(torch_dummy, list(torch_output.stride())) + infini_output = infinicore_tensor_from_torch(torch_dummy) + + def infini_op_inplace(): + self.infinicore_operator(*infini_inputs, out=infini_output, **kwargs) + + infini_op_inplace() + + comparison_dtype = self.get_output_dtype( + test_case, dtype_config, torch_output + ) + compare_fn = create_test_comparator( + config, comparison_dtype, mode_name=f"{self.operator_name} {mode_name}" + ) + is_valid = compare_fn(infini_output, torch_output) + assert is_valid, f"{self.operator_name} {mode_name} test failed" + + if config.bench: + profile_operation( + f"PyTorch {self.operator_name} {mode_name}", + torch_op_inplace, + device_str, + config.num_prerun, + config.num_iterations, + ) + profile_operation( + f"Infinicore {self.operator_name} {mode_name}", + infini_op_inplace, + device_str, + config.num_prerun, + config.num_iterations, + ) diff --git a/test/infinicore/framework/runner.py b/test/infinicore/framework/runner.py new file mode 100644 index 000000000..eb364daad --- /dev/null +++ b/test/infinicore/framework/runner.py @@ -0,0 +1,49 @@ +""" +Generic test runner that handles the common execution flow for all operators +""" + +import sys +from . import TestConfig, TestRunner, get_args, get_test_devices + + +class GenericTestRunner: + """Generic test runner that handles the common execution flow""" + + def __init__(self, operator_test_class): + """ + Args: + operator_test_class: A class that implements BaseOperatorTest interface + """ + self.operator_test = operator_test_class() + self.args = get_args() + + def run(self): + """Execute the complete test suite""" + config = TestConfig( + tensor_dtypes=self.operator_test.tensor_dtypes, + tolerance_map=self.operator_test.tolerance_map, + debug=self.args.debug, + bench=self.args.bench, + num_prerun=self.args.num_prerun, + num_iterations=self.args.num_iterations, + dtype_combinations=self.operator_test.dtype_combinations, + ) + + runner = TestRunner(self.operator_test.test_cases, config) + devices = get_test_devices(self.args) + + # Run unified tests + all_passed = runner.run_tests( + devices, self.operator_test.run_test, self.operator_test.operator_name + ) + + # Print summary + summary_passed = runner.print_summary() + all_passed = all_passed and summary_passed + + return all_passed + + def run_and_exit(self): + """Run tests and exit with appropriate status code""" + success = self.run() + sys.exit(0 if success else 1) diff --git a/test/infinicore/framework/templates.py b/test/infinicore/framework/templates.py new file mode 100644 index 000000000..367c6e108 --- /dev/null +++ b/test/infinicore/framework/templates.py @@ -0,0 +1,108 @@ +""" +Templates for common operator patterns to minimize code duplication + +Available configuration methods in BaseOperatorTest: + +1. get_test_cases() -> List[TestCase] + - Define input/output shapes, strides, and operation modes + - Operation modes: TestCase.OUT_OF_PLACE, TestCase.IN_PLACE, TestCase.BOTH + +2. get_tensor_dtypes() -> List[infinicore.dtype] + - Define supported data types for single-dtype tests + - Used when dtype_combinations is None + +3. get_tolerance_map() -> Dict[infinicore.dtype, Dict[str, float]] + - Set tolerance (atol, rtol) for each data type + - Example: {infinicore.float16: {"atol": 1e-3, "rtol": 1e-2}} + +4. get_dtype_combinations() -> Optional[List[Dict]] + - Define mixed dtype configurations for multi-dtype tests + - Return None for single-dtype tests + +5. torch_operator(*inputs, out=None, **kwargs) -> torch.Tensor + - Implement PyTorch reference implementation + +6. infinicore_operator(*inputs, out=None, **kwargs) -> infinicore.Tensor + - Implement Infinicore operator implementation + +New Tensor Initialization Modes: +- TensorInitializer.RANDOM (default): Random values using torch.rand +- TensorInitializer.ZEROS: All zeros using torch.zeros +- TensorInitializer.ONES: All ones using torch.ones +- TensorInitializer.RANDINT: Random integers using torch.randint +- TensorInitializer.MANUAL: Use a pre-existing tensor with shape/strides validation +- TensorInitializer.BINARY: Use a pre-existing tensor with shape validation only + +Usage examples in TestCase creation: +- Basic: TensorSpec.from_tensor(shape) +- With initialization: TensorSpec.from_tensor(shape, init_mode=TensorInitializer.ZEROS) +- Strided with custom init: TensorSpec.from_strided_tensor(shape, strides, init_mode=TensorInitializer.ONES) +""" + +import torch +import infinicore +from .base import BaseOperatorTest +from .tensor import TensorSpec, TensorInitializer + + +class BinaryOperatorTest(BaseOperatorTest): + """Template for binary operators (matmul, add, mul, etc.)""" + + def __init__(self, operator_name, test_cases, tensor_dtypes, tolerance_map): + self._operator_name = operator_name + self._test_cases = test_cases + self._tensor_dtypes = tensor_dtypes + self._tolerance_map = tolerance_map + super().__init__(operator_name) + + def get_test_cases(self): + return self._test_cases + + def get_tensor_dtypes(self): + return self._tensor_dtypes + + def get_tolerance_map(self): + return self._tolerance_map + + def torch_operator(self, *inputs, **kwargs): + """Generic torch operator dispatch""" + # Support both functional and method calls + if hasattr(torch, self._operator_name): + op = getattr(torch, self._operator_name) + else: + # Fallback to common operator mappings + op_mapping = { + "matmul": torch.matmul, + "add": torch.add, + "mul": torch.mul, + "sub": torch.sub, + "div": torch.div, + } + op = op_mapping.get(self._operator_name) + if op is None: + raise NotImplementedError( + f"Torch operator {self._operator_name} not implemented" + ) + + return op(*inputs, **kwargs) + + def infinicore_operator(self, *inputs, **kwargs): + """Generic infinicore operator dispatch""" + op = getattr(infinicore, self._operator_name) + return op(*inputs, **kwargs) + + +class UnaryOperatorTest(BinaryOperatorTest): + """Template for unary operators (exp, log, sin, etc.)""" + + def torch_operator(self, *inputs, **kwargs): + # For unary operators, we only use the first input + if hasattr(torch, self._operator_name): + op = getattr(torch, self._operator_name) + return op(inputs[0], **kwargs) + else: + return super().torch_operator(*inputs, **kwargs) + + def infinicore_operator(self, *inputs, **kwargs): + op = getattr(infinicore, self._operator_name) + return op(inputs[0], **kwargs) diff --git a/test/infinicore/framework/tensor.py b/test/infinicore/framework/tensor.py new file mode 100644 index 000000000..6aa5ca7b4 --- /dev/null +++ b/test/infinicore/framework/tensor.py @@ -0,0 +1,416 @@ +import torch +from pathlib import Path +from .datatypes import to_torch_dtype +from .devices import torch_device_map + + +class TensorInitializer: + """Tensor data initializer with multiple modes""" + + RANDOM = "random" + ZEROS = "zeros" + ONES = "ones" + RANDINT = "randint" + MANUAL = "manual" + BINARY = "binary" + FROM_FILE = "from_file" + + @staticmethod + def create_tensor( + shape, dtype, device, mode=RANDOM, strides=None, set_tensor=None, file_path=None + ): + """ + Create a torch tensor with specified initialization mode + + Args: + shape: Tensor shape + dtype: infinicore dtype + device: InfiniDeviceEnum + mode: Initialization mode + strides: Optional strides for strided tensors + set_tensor: Pre-existing tensor for manual/binary mode + file_path: Path to file for FROM_FILE mode + + Returns: + torch.Tensor: Initialized tensor + """ + # Convert InfiniDeviceEnum to torch device string + torch_device_str = torch_device_map[device] + torch_dtype = to_torch_dtype(dtype) + + # Handle strided tensors - calculate required storage size + if strides is not None: + # Calculate the required storage size for strided tensor + storage_size = 0 + for i in range(len(shape)): + if shape[i] > 0: + storage_size += (shape[i] - 1) * abs(strides[i]) + storage_size += 1 # Add 1 for the base element + + # Create base storage with sufficient size + if mode == TensorInitializer.RANDOM: + base_tensor = torch.rand( + storage_size, dtype=torch_dtype, device=torch_device_str + ) + elif mode == TensorInitializer.ZEROS: + base_tensor = torch.zeros( + storage_size, dtype=torch_dtype, device=torch_device_str + ) + elif mode == TensorInitializer.ONES: + base_tensor = torch.ones( + storage_size, dtype=torch_dtype, device=torch_device_str + ) + elif mode == TensorInitializer.RANDINT: + base_tensor = torch.randint( + -2000000000, + 2000000000, + (storage_size,), + dtype=torch_dtype, + device=torch_device_str, + ) + elif mode == TensorInitializer.MANUAL: + assert set_tensor is not None, "Manual mode requires set_tensor" + base_tensor = set_tensor.to(torch_dtype).to(torch_device_str) + elif mode == TensorInitializer.BINARY: + assert set_tensor is not None, "Binary mode requires set_tensor" + base_tensor = set_tensor.to(torch_dtype).to(torch_device_str) + elif mode == TensorInitializer.FROM_FILE: + base_tensor = TensorInitializer._load_from_file( + file_path, storage_size, torch_dtype, torch_device_str + ) + else: + raise ValueError(f"Unsupported initialization mode: {mode}") + + # Create strided view + tensor = torch.as_strided(base_tensor, shape, strides) + else: + # Contiguous tensor + if mode == TensorInitializer.RANDOM: + tensor = torch.rand(shape, dtype=torch_dtype, device=torch_device_str) + elif mode == TensorInitializer.ZEROS: + tensor = torch.zeros(shape, dtype=torch_dtype, device=torch_device_str) + elif mode == TensorInitializer.ONES: + tensor = torch.ones(shape, dtype=torch_dtype, device=torch_device_str) + elif mode == TensorInitializer.RANDINT: + tensor = torch.randint( + -2000000000, + 2000000000, + shape, + dtype=torch_dtype, + device=torch_device_str, + ) + elif mode == TensorInitializer.MANUAL: + assert set_tensor is not None, "Manual mode requires set_tensor" + assert shape == list(set_tensor.shape), "Shape mismatch in manual mode" + tensor = set_tensor.to(torch_dtype).to(torch_device_str) + elif mode == TensorInitializer.BINARY: + assert set_tensor is not None, "Binary mode requires set_tensor" + assert shape == list(set_tensor.shape), "Shape mismatch in binary mode" + tensor = set_tensor.to(torch_dtype).to(torch_device_str) + elif mode == TensorInitializer.FROM_FILE: + tensor = TensorInitializer._load_from_file( + file_path, shape, torch_dtype, torch_device_str + ) + else: + raise ValueError(f"Unsupported initialization mode: {mode}") + + return tensor + + @staticmethod + def _load_from_file(file_path, shape_or_size, torch_dtype, torch_device_str): + """ + Load tensor data from file using PyTorch's native methods + + Args: + file_path: Path to the file + shape_or_size: Tensor shape for contiguous or size for strided + torch_dtype: Target torch dtype + torch_device_str: Target device string + + Returns: + torch.Tensor: Tensor with data loaded from file + """ + if file_path is None: + raise ValueError("FROM_FILE mode requires file_path") + + file_path = Path(file_path) + if not file_path.exists(): + raise FileNotFoundError(f"File not found: {file_path}") + + # Determine file type and load accordingly + file_extension = file_path.suffix.lower() + + if file_extension in [".pt", ".pth"]: + # PyTorch native format + tensor = torch.load(file_path, map_location=torch_device_str) + + elif file_extension in [".bin", ".dat", ".raw"]: + # Raw binary format - we need to know the expected shape + tensor = TensorInitializer._load_binary_file( + file_path, shape_or_size, torch_dtype, torch_device_str + ) + + elif file_extension in [".npy"]: + # NumPy format - fallback to numpy if needed + try: + import numpy as np + + numpy_array = np.load(file_path) + tensor = ( + torch.from_numpy(numpy_array).to(torch_dtype).to(torch_device_str) + ) + except ImportError: + raise ImportError("NumPy is required to load .npy files") + + else: + # Try to load as PyTorch format first, then fallback to binary + try: + tensor = torch.load(file_path, map_location=torch_device_str) + except: + # Fallback to binary loading + tensor = TensorInitializer._load_binary_file( + file_path, shape_or_size, torch_dtype, torch_device_str + ) + + # Ensure correct dtype and device + tensor = tensor.to(torch_dtype).to(torch_device_str) + + # Validate shape/size + if isinstance(shape_or_size, (list, tuple)): + # Contiguous tensor - check shape + if list(tensor.shape) != list(shape_or_size): + raise ValueError( + f"Tensor shape mismatch: expected {shape_or_size}, got {tensor.shape}" + ) + else: + # Strided tensor - check total size + if tensor.numel() != shape_or_size: + raise ValueError( + f"Tensor size mismatch: expected {shape_or_size} elements, got {tensor.numel()}" + ) + + return tensor + + @staticmethod + def _load_binary_file(file_path, shape_or_size, torch_dtype, torch_device_str): + """ + Load tensor from raw binary file + + Args: + file_path: Path to binary file + shape_or_size: Expected shape or size + torch_dtype: Target dtype + torch_device_str: Target device + + Returns: + torch.Tensor: Loaded tensor + """ + # Read binary data + with open(file_path, "rb") as f: + binary_data = f.read() + + # Create tensor from buffer + if isinstance(shape_or_size, (list, tuple)): + # Contiguous tensor with known shape + tensor = torch.frombuffer(binary_data, dtype=torch_dtype).reshape( + shape_or_size + ) + else: + # Strided tensor - just 1D buffer + tensor = torch.frombuffer(binary_data, dtype=torch_dtype) + + return tensor.to(torch_device_str) + + @staticmethod + def save_to_file(tensor, file_path, format="auto"): + """ + Save tensor data to file using PyTorch's native methods + + Args: + tensor: torch.Tensor to save + file_path: Path to save the file + format: File format ('auto', 'torch', 'binary', 'numpy') + """ + file_path = Path(file_path) + + if format == "auto": + # Determine format from file extension + file_extension = file_path.suffix.lower() + if file_extension in [".pt", ".pth"]: + format = "torch" + elif file_extension in [".npy"]: + format = "numpy" + else: + format = "binary" + + if format == "torch": + # PyTorch native format (preserves metadata) + torch.save(tensor, file_path) + + elif format == "binary": + # Raw binary format + with open(file_path, "wb") as f: + f.write(tensor.cpu().numpy().tobytes()) + + elif format == "numpy": + # NumPy format + try: + import numpy as np + + np.save(file_path, tensor.cpu().numpy()) + except ImportError: + raise ImportError("NumPy is required to save .npy files") + + else: + raise ValueError(f"Unsupported format: {format}") + + print( + f"Tensor saved to {file_path} (shape: {tensor.shape}, dtype: {tensor.dtype}, format: {format})" + ) + + @staticmethod + def list_supported_formats(): + """Return list of supported file formats""" + return { + "torch": [".pt", ".pth"], # PyTorch native format + "binary": [".bin", ".dat", ".raw"], # Raw binary + "numpy": [".npy"], # NumPy format + } + + +class TensorSpec: + """Tensor specification supporting various input types and per-tensor dtype""" + + def __init__( + self, + shape=None, + dtype=None, + strides=None, + value=None, + is_scalar=False, + is_contiguous=True, + init_mode=TensorInitializer.RANDOM, # Default to random initialization + custom_tensor=None, # For manual/binary mode + file_path=None, # For FROM_FILE mode + file_format=None, # Optional file format hint + ): + self.shape = shape + self.dtype = dtype + self.strides = strides + self.value = value + self.is_scalar = is_scalar + self.is_contiguous = is_contiguous + self.init_mode = init_mode + self.custom_tensor = custom_tensor + self.file_path = file_path + self.file_format = file_format + + @classmethod + def from_tensor( + cls, + shape, + dtype=None, + strides=None, + is_contiguous=True, + init_mode=TensorInitializer.RANDOM, + custom_tensor=None, + file_path=None, + ): + return cls( + shape=shape, + dtype=dtype, + strides=strides, + is_scalar=False, + is_contiguous=is_contiguous, + init_mode=init_mode, + custom_tensor=custom_tensor, + file_path=file_path, + ) + + @classmethod + def from_scalar(cls, value, dtype=None): + return cls(value=value, dtype=dtype, is_scalar=True) + + @classmethod + def from_strided_tensor( + cls, + shape, + strides, + dtype=None, + init_mode=TensorInitializer.RANDOM, + custom_tensor=None, + file_path=None, + ): + return cls( + shape=shape, + dtype=dtype, + strides=strides, + is_scalar=False, + is_contiguous=False, + init_mode=init_mode, + custom_tensor=custom_tensor, + file_path=file_path, + ) + + @classmethod + def from_file( + cls, + file_path, + shape, + dtype=None, + strides=None, + is_contiguous=True, + file_format=None, + ): + """ + Create TensorSpec that loads data from file + + Args: + file_path: Path to file + shape: Tensor shape + dtype: infinicore dtype (inferred from file if None) + strides: Optional strides for strided tensors + is_contiguous: Whether tensor is contiguous + file_format: Optional file format hint + + Returns: + TensorSpec: Configured for file loading + """ + return cls( + shape=shape, + dtype=dtype, + strides=strides, + is_scalar=False, + is_contiguous=is_contiguous, + init_mode=TensorInitializer.FROM_FILE, + file_path=file_path, + file_format=file_format, + ) + + def create_torch_tensor(self, device, dtype_config, tensor_index=0): + """Create a torch tensor based on this specification""" + if self.is_scalar: + return self.value + + # Determine dtype - ensure we're using infinicore dtype, not torch dtype + if self.dtype is not None: + tensor_dtype = self.dtype + elif isinstance(dtype_config, dict) and f"input_{tensor_index}" in dtype_config: + tensor_dtype = dtype_config[f"input_{tensor_index}"] + elif isinstance(dtype_config, (list, tuple)) and tensor_index < len( + dtype_config + ): + tensor_dtype = dtype_config[tensor_index] + else: + tensor_dtype = dtype_config + + # Create tensor using the specified initialization mode + return TensorInitializer.create_tensor( + shape=self.shape, + dtype=tensor_dtype, + device=device, + mode=self.init_mode, + strides=self.strides, + set_tensor=self.custom_tensor, + file_path=self.file_path, + ) diff --git a/test/infinicore/framework/utils.py b/test/infinicore/framework/utils.py index ed0cc4f71..7e6a138bb 100644 --- a/test/infinicore/framework/utils.py +++ b/test/infinicore/framework/utils.py @@ -4,18 +4,6 @@ from .datatypes import to_infinicore_dtype, to_torch_dtype -def create_infinicore_tensor(torch_tensor, device_str): - """Create infinicore tensor from PyTorch tensor""" - infini_device = infinicore.device(device_str, 0) - - return infinicore.from_blob( - torch_tensor.data_ptr(), - list(torch_tensor.shape), - dtype=to_infinicore_dtype(torch_tensor.dtype), - device=infini_device, - ) - - def synchronize_device(torch_device): """Device synchronization""" if torch_device == "cuda": @@ -117,7 +105,6 @@ def add_color(text, color_code): f"delta: {add_color(delta_str, 33)}" ) - print(add_color(" INFO:", 35)) print(f" - Actual dtype: {actual.dtype}") print(f" - Desired dtype: {expected.dtype}") print(f" - Atol: {atol}") @@ -149,44 +136,103 @@ def get_tolerance(tolerance_map, tensor_dtype, default_atol=0, default_rtol=1e-3 return tolerance["atol"], tolerance["rtol"] -def compare_results( - infini_result, torch_result, dtype, config, device_str, tolerance_map=None -): +def infinicore_tensor_from_torch(torch_tensor): + infini_device = infinicore.device(torch_tensor.device.type, 0) + if torch_tensor.is_contiguous(): + return infinicore.from_blob( + torch_tensor.data_ptr(), + list(torch_tensor.shape), + dtype=to_infinicore_dtype(torch_tensor.dtype), + device=infini_device, + ) + else: + return infinicore.strided_from_blob( + torch_tensor.data_ptr(), + list(torch_tensor.shape), + list(torch_tensor.stride()), + dtype=to_infinicore_dtype(torch_tensor.dtype), + device=infini_device, + ) + + +def convert_infinicore_to_torch(infini_result, torch_reference): """ - Compare infinicore result with PyTorch reference result + Convert infinicore tensor to PyTorch tensor for comparison Args: infini_result: infinicore tensor result - torch_result: PyTorch tensor reference result + torch_reference: PyTorch tensor reference (for shape and device) dtype: infinicore data type - config: test config device_str: torch device string - device: device enum - tolerance_map: optional tolerance map (defaults to config's tolerance_map) Returns: - bool: True if results match within tolerance + torch.Tensor: PyTorch tensor with infinicore data """ - # Convert infinicore result to PyTorch tensor for comparison torch_result_from_infini = torch.zeros( - torch_result.shape, dtype=to_torch_dtype(dtype), device=device_str + torch_reference.shape, + dtype=to_torch_dtype(infini_result.dtype), + device=infini_result.device.type, ) - temp_tensor = create_infinicore_tensor(torch_result_from_infini, device_str) + temp_tensor = infinicore_tensor_from_torch(torch_result_from_infini) temp_tensor.copy_(infini_result) + return torch_result_from_infini - # Retrieve tolerance - use provided map or config's map - if tolerance_map is None: - tolerance_map = config.tolerance_map - atol, rtol = get_tolerance(tolerance_map, dtype) + +def compare_results( + infini_result, torch_result, atol=1e-5, rtol=1e-5, debug_mode=False +): + """ + Generic function to compare infinicore result with PyTorch reference result + + Args: + infini_result: infinicore tensor result + torch_result: PyTorch tensor reference result + atol: absolute tolerance + rtol: relative tolerance + debug_mode: whether to enable debug output + + Returns: + bool: True if results match within tolerance + """ + # Convert infinicore result to PyTorch tensor for comparison + torch_result_from_infini = convert_infinicore_to_torch(infini_result, torch_result) # Debug mode: detailed comparison - if config.debug: + if debug_mode: debug(torch_result_from_infini, torch_result, atol=atol, rtol=rtol) # Check if results match within tolerance return torch.allclose(torch_result_from_infini, torch_result, atol=atol, rtol=rtol) +def create_test_comparator(config, dtype, tolerance_map=None, mode_name=""): + """ + Create a test-specific comparison function that handles test configuration + + Args: + config: test configuration + dtype: infinicore data type + tolerance_map: optional tolerance map (defaults to config's tolerance_map) + mode_name: operation mode name for debug output + + Returns: + callable: function that takes (infini_result, torch_result) and returns bool + """ + if tolerance_map is None: + tolerance_map = config.tolerance_map + + atol, rtol = get_tolerance(tolerance_map, dtype) + + def compare_test_results(infini_result, torch_result): + if config.debug and mode_name: + print(f"\n\033[94mDEBUG INFO - {mode_name}:\033[0m") + return compare_results( + infini_result, torch_result, atol=atol, rtol=rtol, debug_mode=config.debug + ) + + return compare_test_results + + def rearrange_tensor(tensor, new_strides): """ Given a PyTorch tensor and a list of new strides, return a new PyTorch tensor with the given strides. diff --git a/test/infinicore/op/matmul.py b/test/infinicore/op/matmul.py deleted file mode 100644 index 4ae708c60..000000000 --- a/test/infinicore/op/matmul.py +++ /dev/null @@ -1,232 +0,0 @@ -import torch -import infinicore -import sys -import os - -# Framework path -sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) - -from framework import ( - TestConfig, - TestRunner, - TestCase, - create_infinicore_tensor, - compare_results, - get_args, - get_test_devices, - profile_operation, - to_torch_dtype, - InfiniDeviceNames, - torch_device_map, -) - -# ============================================================================== -# Test Setup -# ============================================================================== - -# Test cases -_TEST_CASES = [ - # (a_shape, b_shape, result_shape, a_stride, b_stride, c_stride) - TestCase((2, 3), (3, 4), (2, 4), None, None, None), - TestCase((128, 256), (256, 64), (128, 64), None, None, None), - TestCase((2, 4, 2048), (2, 2048, 2048), (2, 4, 2048), None, None, None), - TestCase((1, 2048), (2048, 2048), (1, 2048), (4096, 1), (4096, 1), (4096, 1)), - TestCase((6, 2048), (2048, 2560), (6, 2560), (2048, 1), (1, 2048), (2560, 1)), - TestCase((4, 8 * 6, 64), (4, 64, 6), (4, 8 * 6, 6), None, None, None), -] - -# Data types - now using infinicore native types -_TENSOR_DTYPES = [infinicore.float16, infinicore.bfloat16, infinicore.float32] - -# Tolerance -_TOLERANCE_MAP = { - infinicore.float16: {"atol": 0, "rtol": 1e-2}, - infinicore.float32: {"atol": 0, "rtol": 1e-3}, - infinicore.bfloat16: {"atol": 0, "rtol": 5e-2}, -} - -# ============================================================================== -# Test Method -# ============================================================================== - - -def test_matmul(device, test_case, dtype, config): - """ - Test matmul operation - - Args: - device: device enum - test_case: test case - dtype: infinicore data type - config: test config - """ - a_shape, b_shape, result_shape, a_stride, b_stride, c_stride = test_case.args - - print( - f"Testing Matmul on {InfiniDeviceNames[device]} with " - f"a_shape:{a_shape}, b_shape:{b_shape}, result_shape:{result_shape}, " - f"a_stride:{a_stride}, b_stride:{b_stride}, c_stride:{c_stride}, " - f"dtype:{dtype}" - ) - - # Create PyTorch tensors - device_str = torch_device_map[device] - torch_dtype = to_torch_dtype(dtype) - - torch_a = torch.rand(a_shape, dtype=torch_dtype, device=device_str) - torch_b = torch.rand(b_shape, dtype=torch_dtype, device=device_str) - - # Calculate PyTorch reference result - def torch_matmul(): - return torch.matmul(torch_a, torch_b) - - torch_result = torch_matmul() - - # Create infinicore tensors - infini_a = create_infinicore_tensor(torch_a, device_str) - infini_b = create_infinicore_tensor(torch_b, device_str) - - # Out-of-place matmul - def infini_matmul(): - return infinicore.matmul(infini_a, infini_b) - - infini_result = infini_matmul() - - # Validate results using common method - is_valid = compare_results(infini_result, torch_result, dtype, config, device_str) - assert is_valid, "Matmul test failed" - - # Performance test - if config.bench: - profile_operation( - "PyTorch", - torch_matmul, - device_str, - config.num_prerun, - config.num_iterations, - ) - profile_operation( - "Infinicore", - infini_matmul, - device_str, - config.num_prerun, - config.num_iterations, - ) - - -def test_matmul_inplace(device, test_case, dtype, config): - """ - Test in-place matmul operation - - Args: - device: device enum - test_case: test case - dtype: infinicore data type - config: test config - """ - a_shape, b_shape, result_shape, a_stride, b_stride, c_stride = test_case.args - - print( - f"Testing In-place Matmul on {InfiniDeviceNames[device]} with " - f"a_shape:{a_shape}, b_shape:{b_shape}, result_shape:{result_shape}, " - f"dtype:{dtype}" - ) - - device_str = torch_device_map[device] - torch_dtype = to_torch_dtype(dtype) - - # Create PyTorch tensors - torch_a = torch.rand(a_shape, dtype=torch_dtype, device=device_str) - torch_b = torch.rand(b_shape, dtype=torch_dtype, device=device_str) - - # Create pre-allocated result tensor - torch_preallocated = torch.zeros(result_shape, dtype=torch_dtype, device=device_str) - - # Calculate PyTorch reference result using in-place operation - def torch_matmul_inplace(): - torch.matmul(torch_a, torch_b, out=torch_preallocated) - - # Execute in-place operation - torch_matmul_inplace() - - # Create infinicore tensors - infini_a = create_infinicore_tensor(torch_a, device_str) - infini_b = create_infinicore_tensor(torch_b, device_str) - infini_c = infinicore.empty( - result_shape, dtype=dtype, device=infinicore.device(device_str, 0) - ) - - # Test in-place matmul - def infini_matmul_inplace(): - infinicore.matmul(infini_a, infini_b, out=infini_c) - - # Execute in-place operation - infini_matmul_inplace() - - # Validate results using common method - is_valid = compare_results(infini_c, torch_preallocated, dtype, config, device_str) - assert is_valid, "In-place matmul test failed" - - # Performance test - if config.bench: - profile_operation( - "PyTorch In-place", - torch_matmul_inplace, - device_str, - config.num_prerun, - config.num_iterations, - ) - profile_operation( - "Infinicore In-place", - infini_matmul_inplace, - device_str, - config.num_prerun, - config.num_iterations, - ) - - -# ============================================================================== -# Main Execution Function -# ============================================================================== - - -def main(): - args = get_args() - - # Create test configuration - config = TestConfig( - tensor_dtypes=_TENSOR_DTYPES, - tolerance_map=_TOLERANCE_MAP, - debug=args.debug, - bench=args.bench, - num_prerun=args.num_prerun, - num_iterations=args.num_iterations, - ) - - # Create test runner - runner = TestRunner(_TEST_CASES, config) - - # Get test devices - devices = get_test_devices(args) - - print("Starting matmul tests...") - - all_passed = True - - # Run out-of-place tests - print("\n--- Testing Out-of-place Matmul ---") - out_of_place_passed = runner.run_tests(devices, test_matmul) - all_passed = all_passed and out_of_place_passed - - # Run in-place tests - print("\n--- Testing In-place Matmul ---") - in_place_passed = runner.run_tests(devices, test_matmul_inplace) - all_passed = all_passed and in_place_passed - - runner.print_summary() - - sys.exit(0 if all_passed else 1) - - -if __name__ == "__main__": - main() diff --git a/test/infinicore/ops.py b/test/infinicore/ops.py new file mode 100644 index 000000000..f36f8b8c3 --- /dev/null +++ b/test/infinicore/ops.py @@ -0,0 +1,260 @@ +import os +import sys +import subprocess +import argparse +from pathlib import Path + + +def find_ops_directory(start_dir=None): + """ + Find the ops directory by searching from start_dir upwards. + """ + if start_dir is None: + start_dir = Path(__file__).parent + + ops_dir = start_dir / "ops" + if ops_dir.exists() and (ops_dir / "rms_norm.py").exists(): + return ops_dir + + +def run_all_op_tests(ops_dir=None, verbose=False, specific_ops=None, extra_args=None): + """ + Run all operator test scripts in the ops directory. + + Args: + ops_dir (str, optional): Path to the ops directory. If None, uses the current directory. + verbose (bool): Whether to print detailed output. + specific_ops (list, optional): List of specific operator names to test (e.g., ['add', 'matmul']). + extra_args (list, optional): Extra command line arguments to pass to test scripts. + + Returns: + dict: Results dictionary with test names as keys and (success, return_code, output) as values. + """ + if ops_dir is None: + ops_dir = find_ops_directory() + else: + ops_dir = Path(ops_dir) + + if not ops_dir.exists(): + print(f"Error: Ops directory '{ops_dir}' does not exist.") + return {} + + print(f"Looking for test files in: {ops_dir}") + + # Find all Python test files (looking for actual operator test files) + test_files = list(ops_dir.glob("*.py")) + + # Filter out this script itself and non-operator test files + current_script = Path(__file__).name + test_files = [f for f in test_files if f.name != current_script] + + # Further filter to include only files that look like operator tests + # (they typically import infinicore and BaseOperatorTest) + operator_test_files = [] + for test_file in test_files: + try: + with open(test_file, "r", encoding="utf-8") as f: + content = f.read() + if "infinicore" in content and "BaseOperatorTest" in content: + operator_test_files.append(test_file) + elif verbose: + print(f" Skipping {test_file.name}: not an operator test file") + except Exception as e: + if verbose: + print(f" Could not read {test_file.name}: {e}") + continue + + if specific_ops: + # Filter for specific operators (case insensitive) + filtered_files = [] + for test_file in operator_test_files: + test_name = test_file.stem.lower() + if any(op.lower() in test_name for op in specific_ops): + filtered_files.append(test_file) + elif verbose: + print(f" Filtered out {test_file.name}: not in specific_ops list") + operator_test_files = filtered_files + + if not operator_test_files: + print(f"No operator test files found in {ops_dir}") + print(f"Available Python files: {[f.name for f in test_files]}") + print(f"Current directory: {Path.cwd()}") + return {} + + print(f"Found {len(operator_test_files)} operator test files:") + for test_file in operator_test_files: + print(f" - {test_file.name}") + + results = {} + + for test_file in operator_test_files: + test_name = test_file.stem + + try: + # Run the test script + cmd = [sys.executable, str(test_file)] + + # Add extra arguments if provided + if extra_args: + cmd.extend(extra_args) + + if verbose: + print(f"Command: {' '.join(cmd)}") + print(f"Working directory: {ops_dir}") + + # Always capture output to display it + result = subprocess.run(cmd, cwd=ops_dir, capture_output=True, text=True) + + success = result.returncode == 0 + results[test_name] = ( + success, + result.returncode, + result.stdout, + result.stderr, + ) + + # Print the output from the test script + if result.stdout: + print(result.stdout) + + if result.stderr: + print("STDERR:") + print(result.stderr) + + if success: + print(f"✅ {test_name}: PASSED (return code: {result.returncode})") + else: + print(f"❌ {test_name}: FAILED (return code: {result.returncode})") + + except Exception as e: + print(f"❌ {test_name}: ERROR - {str(e)}") + results[test_name] = (False, -1, "", str(e)) + + return results + + +def print_summary(results): + """Print a summary of test results.""" + print(f"\n{'='*80}") + print("TEST SUMMARY") + print(f"{'='*80}") + + if not results: + print("No tests were run.") + return + + passed = sum(1 for success, _, _, _ in results.values() if success) + total = len(results) + + print(f"Total tests: {total}") + print(f"Passed: {passed}") + print(f"Failed: {total - passed}") + + if total > 0: + print(f"Success rate: {passed/total*100:.1f}%") + + if passed == total: + print("\n🎉 All tests passed!") + else: + print("\nFailed tests:") + for test_name, (success, returncode, stdout, stderr) in results.items(): + if not success: + print(f" - {test_name} (return code: {returncode})") + # Print brief error info for failed tests + if stderr: + error_lines = stderr.strip().split("\n") + if error_lines: + print(f" Error: {error_lines[0]}") + + +def main(): + """Main entry point with command line argument parsing.""" + parser = argparse.ArgumentParser( + description="Run all operator tests in the ops directory", add_help=False + ) + + # Our script's specific arguments + parser.add_argument( + "--ops-dir", type=str, help="Path to the ops directory (default: auto-detect)" + ) + parser.add_argument( + "-v", + "--verbose", + action="store_true", + help="Print detailed command information for each test", + ) + parser.add_argument( + "--ops", nargs="+", help="Run specific operators only (e.g., --ops add matmul)" + ) + parser.add_argument( + "--list", + action="store_true", + help="List all available test files without running them", + ) + parser.add_argument( + "-h", "--help", action="store_true", help="Show this help message and exit" + ) + + # Parse known args first, leave the rest for the test scripts + args, unknown_args = parser.parse_known_args() + + if args.help: + parser.print_help() + print("\nExtra arguments that will be passed to test scripts:") + print(" --nvidia, --cpu, --bench, --debug, etc.") + return + + # Auto-detect ops directory if not provided + if args.ops_dir is None: + ops_dir = find_ops_directory() + else: + ops_dir = Path(args.ops_dir) + + if args.list: + # Just list available test files + test_files = list(ops_dir.glob("*.py")) + current_script = Path(__file__).name + test_files = [f for f in test_files if f.name != current_script] + + operator_test_files = [] + for test_file in test_files: + try: + with open(test_file, "r", encoding="utf-8") as f: + content = f.read() + if "infinicore" in content and "BaseOperatorTest" in content: + operator_test_files.append(test_file) + except: + continue + + if operator_test_files: + print(f"Available operator test files in {ops_dir}:") + for test_file in operator_test_files: + print(f" - {test_file.name}") + else: + print(f"No operator test files found in {ops_dir}") + print(f"Available Python files: {[f.name for f in test_files]}") + return + + # Show what extra arguments will be passed + if unknown_args: + print(f"Passing extra arguments to test scripts: {unknown_args}") + + # Run all tests + results = run_all_op_tests( + ops_dir=ops_dir, + verbose=args.verbose, + specific_ops=args.ops, + extra_args=unknown_args, + ) + + print_summary(results) + + # Exit with appropriate code + if results and all(success for success, _, _, _ in results.values()): + sys.exit(0) + else: + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/test/infinicore/ops/add.py b/test/infinicore/ops/add.py new file mode 100644 index 000000000..a5bb13443 --- /dev/null +++ b/test/infinicore/ops/add.py @@ -0,0 +1,106 @@ +import sys +import os + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) + +import torch +import infinicore +from framework.base import BaseOperatorTest, TensorSpec, TestCase +from framework.runner import GenericTestRunner + +# ============================================================================== +# Operator-specific configuration +# ============================================================================== + +# Test cases format: (operation_mode, shape, a_strides, b_strides, c_strides) +_TEST_CASES_DATA = [ + (TestCase.BOTH, (13, 4), None, None, None), + (TestCase.BOTH, (13, 4), (10, 1), (10, 1), (10, 1)), + (TestCase.BOTH, (13, 4), (0, 1), None, None), + (TestCase.BOTH, (13, 4, 4), None, None, None), + (TestCase.BOTH, (13, 4, 4), (20, 4, 1), (20, 4, 1), (20, 4, 1)), + (TestCase.BOTH, (13, 4, 4), (4, 0, 1), (0, 4, 1), None), + (TestCase.BOTH, (16, 5632), None, None, None), + (TestCase.BOTH, (16, 5632), (13312, 1), (13312, 1), (13312, 1)), +] + + +def parse_add_test_case(data): + """ + Parse add test case data according to format: + (operation_mode, shape, a_strides, b_strides, c_strides) + """ + operation_mode = data[0] + shape = data[1] + a_strides = data[2] if len(data) > 2 else None + b_strides = data[3] if len(data) > 3 else None + c_strides = data[4] if len(data) > 4 else None + + # Create input specifications + inputs = [] + + # Input tensor a + if a_strides is not None: + inputs.append(TensorSpec.from_strided_tensor(shape, a_strides)) + else: + inputs.append(TensorSpec.from_tensor(shape)) + + # Input tensor b (same shape as a) + if b_strides is not None: + inputs.append(TensorSpec.from_strided_tensor(shape, b_strides)) + else: + inputs.append(TensorSpec.from_tensor(shape)) + + # Output tensor + if c_strides is not None: + output = TensorSpec.from_strided_tensor(shape, c_strides) + else: + output = TensorSpec.from_tensor(shape) + + return TestCase(operation_mode, inputs, output) + + +# Parse test cases +_TEST_CASES = [parse_add_test_case(data) for data in _TEST_CASES_DATA] + +# Data types +_TENSOR_DTYPES = [infinicore.float16, infinicore.bfloat16, infinicore.float32] + +# Tolerance +_TOLERANCE_MAP = { + infinicore.float16: {"atol": 0, "rtol": 1e-2}, + infinicore.float32: {"atol": 0, "rtol": 1e-3}, + infinicore.bfloat16: {"atol": 0, "rtol": 5e-2}, +} + + +class OpTest(BaseOperatorTest): + """Add test with simplified test case parsing""" + + def __init__(self): + super().__init__("Add") + + def get_test_cases(self): + return _TEST_CASES + + def get_tensor_dtypes(self): + return _TENSOR_DTYPES + + def get_tolerance_map(self): + return _TOLERANCE_MAP + + def torch_operator(self, a, b, out=None, **kwargs): + return torch.add(a, b, out=out) + + def infinicore_operator(self, a, b, out=None, **kwargs): + return infinicore.add(a, b, out=out) + + +def main(): + """Main entry point""" + runner = GenericTestRunner(OpTest) + runner.run_and_exit() + + +if __name__ == "__main__": + main() diff --git a/test/infinicore/ops/attention_temp.py b/test/infinicore/ops/attention_temp.py new file mode 100644 index 000000000..59af180bc --- /dev/null +++ b/test/infinicore/ops/attention_temp.py @@ -0,0 +1,268 @@ +""" +This is for framework validation +""" + +import sys +import os + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) + +import torch +import infinicore +from framework.base import BaseOperatorTest, TensorSpec, TestCase +from framework.runner import GenericTestRunner + +# ============================================================================== +# Operator-specific configuration +# ============================================================================== + +# Test cases format: (operation_mode, n_q_head, n_kv_head, seq_len, head_dim, pos, +# k_cache_buf_len, v_cache_buf_len, q_strides, k_strides, v_strides, +# k_cache_strides, v_cache_strides) +_TEST_CASES_DATA = [ + # Prefill stage + ( + TestCase.OUT_OF_PLACE, + 32, + 4, + 5, + 64, + 0, + 2048, + 2048, + [64, 2560, 1], + [64, 2560, 1], + [64, 2560, 1], + [64, 11264, 1], + [64, 11264, 1], + ), + # Decode stage + ( + TestCase.OUT_OF_PLACE, + 32, + 4, + 1, + 64, + 3, + 2048, + 2048, + [64, 2560, 1], + [64, 2560, 1], + [64, 2560, 1], + [64, 11264, 1], + [64, 11264, 1], + ), + # Small test case + (TestCase.OUT_OF_PLACE, 8, 4, 2, 16, 1, 8, 8, None, None, None, None, None), + # Another prefill case + ( + TestCase.OUT_OF_PLACE, + 28, + 28, + 15, + 128, + 0, + 2048, + 2048, + [128, 10752, 1], + [128, 10752, 1], + [128, 10752, 1], + [128, 3584, 1], + [128, 3584, 1], + ), +] + +# Epsilon constant for causal softmax +_EPSILON = 1e-5 + + +def causal_softmax(x): + """Apply causal mask and softmax to attention scores""" + input_dtype = x.dtype + # Create causal mask + mask = torch.tril(torch.ones_like(x), diagonal=-1).flip(dims=[-2, -1]) + # Apply mask: set masked positions to -inf + masked = torch.where(mask == 1, -torch.inf, x.to(torch.float32)) + # Apply softmax and convert back to original dtype + return torch.nn.functional.softmax(masked, dim=-1).to(input_dtype) + + +def torch_attention(q, k, v, k_cache, v_cache, pos): + """PyTorch reference implementation of attention""" + input_dtype = q.dtype + + n_q_head = q.shape[0] + n_kv_head = k.shape[0] + + # Concatenate key and value caches + k_cache = k_cache[:, :pos, :] # (n_kv_head, pos, head_dim) + v_cache = v_cache[:, :pos, :] # (n_kv_head, pos, head_dim) + k = torch.cat([k_cache, k], dim=1) # (n_kv_head, total_seq_len, head_dim) + v = torch.cat([v_cache, v], dim=1) # (n_kv_head, total_seq_len, head_dim) + + total_seq_len = k.shape[1] + head_dim = v.shape[-1] + + # Handle grouped query attention (GQA) + if n_q_head != n_kv_head: + q = q.reshape( + n_kv_head, -1, head_dim + ) # (n_kv_head, n_group * seq_len, head_dim) + + # Scaled dot-product attention + attn_scores = ( + torch.einsum("hqd,hkd->hqk", q.to(torch.float32), k.to(torch.float32)) + .to(input_dtype) + .reshape(n_q_head, -1, total_seq_len) + ) # (n_q_head, seq_len, total_seq_len) + + # Scale by sqrt(head_dim) + attn_scores = attn_scores / (head_dim**0.5) + + # Apply causal softmax + attn_weights = causal_softmax(attn_scores).reshape( + n_kv_head, -1, total_seq_len + ) # (n_kv_head, seq_len, total_seq_len) + + # Weighted sum of values + attn_output = ( + torch.einsum( + "hqk,hkd->hqd", attn_weights.to(torch.float32), v.to(torch.float32) + ) + .to(input_dtype) + .reshape(n_q_head, -1, head_dim) + .permute(1, 0, 2) + ) # (seq_len, n_q_head, head_dim) + + return attn_output + + +def parse_attention_test_case(data): + """ + Parse attention test case data according to format: + (operation_mode, n_q_head, n_kv_head, seq_len, head_dim, pos, + k_cache_buf_len, v_cache_buf_len, q_strides, k_strides, v_strides, + k_cache_strides, v_cache_strides) + """ + operation_mode = data[0] + n_q_head, n_kv_head, seq_len, head_dim, pos = ( + data[1], + data[2], + data[3], + data[4], + data[5], + ) + k_cache_buf_len, v_cache_buf_len = data[6], data[7] + q_strides = data[8] if len(data) > 8 else None + k_strides = data[9] if len(data) > 9 else None + v_strides = data[10] if len(data) > 10 else None + k_cache_strides = data[11] if len(data) > 11 else None + v_cache_strides = data[12] if len(data) > 12 else None + + # Create input specifications + inputs = [] + + # Query tensor: (n_q_head, seq_len, head_dim) + if q_strides is not None: + inputs.append( + TensorSpec.from_strided_tensor((n_q_head, seq_len, head_dim), q_strides) + ) + else: + inputs.append(TensorSpec.from_tensor((n_q_head, seq_len, head_dim))) + + # Key tensor: (n_kv_head, seq_len, head_dim) + if k_strides is not None: + inputs.append( + TensorSpec.from_strided_tensor((n_kv_head, seq_len, head_dim), k_strides) + ) + else: + inputs.append(TensorSpec.from_tensor((n_kv_head, seq_len, head_dim))) + + # Value tensor: (n_kv_head, seq_len, head_dim) + if v_strides is not None: + inputs.append( + TensorSpec.from_strided_tensor((n_kv_head, seq_len, head_dim), v_strides) + ) + else: + inputs.append(TensorSpec.from_tensor((n_kv_head, seq_len, head_dim))) + + # Key cache: (n_kv_head, k_cache_buf_len, head_dim) + if k_cache_strides is not None: + inputs.append( + TensorSpec.from_strided_tensor( + (n_kv_head, k_cache_buf_len, head_dim), k_cache_strides + ) + ) + else: + inputs.append(TensorSpec.from_tensor((n_kv_head, k_cache_buf_len, head_dim))) + + # Value cache: (n_kv_head, v_cache_buf_len, head_dim) + if v_cache_strides is not None: + inputs.append( + TensorSpec.from_strided_tensor( + (n_kv_head, v_cache_buf_len, head_dim), v_cache_strides + ) + ) + else: + inputs.append(TensorSpec.from_tensor((n_kv_head, v_cache_buf_len, head_dim))) + + # Position (scalar) + inputs.append(TensorSpec.from_scalar(pos)) + + # Output tensor: (seq_len, n_q_head, head_dim) + output_shape = (seq_len, n_q_head, head_dim) + output = TensorSpec.from_tensor(output_shape) + + return TestCase(operation_mode, inputs, output) + + +# Parse test cases +_TEST_CASES = [parse_attention_test_case(data) for data in _TEST_CASES_DATA] + +# Data types +_TENSOR_DTYPES = [infinicore.float16, infinicore.bfloat16, infinicore.float32] + +# Tolerance +_TOLERANCE_MAP = { + infinicore.float16: {"atol": 1e-4, "rtol": 1e-2}, + infinicore.float32: {"atol": 1e-5, "rtol": 1e-3}, + infinicore.bfloat16: {"atol": 1e-3, "rtol": 5e-2}, +} + + +class OpTest(BaseOperatorTest): + """Attention test with simplified test case parsing""" + + def __init__(self): + super().__init__("Attention") + + def get_test_cases(self): + return _TEST_CASES + + def get_tensor_dtypes(self): + return _TENSOR_DTYPES + + def get_tolerance_map(self): + return _TOLERANCE_MAP + + def torch_operator(self, q, k, v, k_cache, v_cache, pos, out=None, **kwargs): + result = torch_attention(q, k, v, k_cache, v_cache, pos) + + if out is not None: + out.set_(result) + return out + else: + return result + + def infinicore_operator(self, q, k, v, k_cache, v_cache, pos, out=None, **kwargs): + return infinicore.attention(q, k, v, k_cache, v_cache, pos, out=out) + + +def main(): + """Main entry point""" + runner = GenericTestRunner(OpTest) + runner.run_and_exit() + + +if __name__ == "__main__": + main() diff --git a/test/infinicore/ops/matmul.py b/test/infinicore/ops/matmul.py new file mode 100644 index 000000000..e243edd95 --- /dev/null +++ b/test/infinicore/ops/matmul.py @@ -0,0 +1,122 @@ +import sys +import os + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) + +import torch +import infinicore +from framework.base import BaseOperatorTest, TensorSpec, TestCase +from framework.runner import GenericTestRunner + +# ============================================================================== +# Operator-specific configuration +# ============================================================================== + +# Test cases format: (operation_mode, nbatch, m, n, k, a_strides, b_strides, c_strides) +# If nbatch is None: a_shape=(m, k), b_shape=(k, n), c_shape=(m, n) +# If nbatch is provided: a_shape=(nbatch, m, k), b_shape=(nbatch, k, n), c_shape=(nbatch, m, n) +_TEST_CASES_DATA = [ + # Basic 2D matmul + (TestCase.BOTH, None, 2, 4, 3, None, None, None), + (TestCase.BOTH, None, 128, 64, 256, None, None, None), + # Batched matmul + (TestCase.BOTH, 2, 4, 2048, 2048, None, None, None), + (TestCase.BOTH, 4, 48, 6, 64, None, None, None), + # Strided tensors + (TestCase.BOTH, None, 1, 2048, 2048, (4096, 1), (4096, 1), (4096, 1)), + (TestCase.BOTH, None, 6, 2560, 2048, (2048, 1), (1, 2048), (2560, 1)), + # Mixed cases + (TestCase.BOTH, 8, 16, 32, 16, None, None, None), +] + + +def parse_matmul_test_case(data): + """ + Parse matmul test case data according to format: + (operation_mode, nbatch, m, n, k, a_strides, b_strides, c_strides) + """ + operation_mode = data[0] + nbatch = data[1] + m, n, k = data[2], data[3], data[4] + a_strides = data[5] if len(data) > 5 else None + b_strides = data[6] if len(data) > 6 else None + c_strides = data[7] if len(data) > 7 else None + + # Determine shapes based on batch dimension + if nbatch is None: + a_shape = (m, k) + b_shape = (k, n) + c_shape = (m, n) + else: + a_shape = (nbatch, m, k) + b_shape = (nbatch, k, n) + c_shape = (nbatch, m, n) + + # Create input specifications + inputs = [] + + # Tensor a + if a_strides is not None: + inputs.append(TensorSpec.from_strided_tensor(a_shape, a_strides)) + else: + inputs.append(TensorSpec.from_tensor(a_shape)) + + # Tensor b + if b_strides is not None: + inputs.append(TensorSpec.from_strided_tensor(b_shape, b_strides)) + else: + inputs.append(TensorSpec.from_tensor(b_shape)) + + # Output tensor + if c_strides is not None: + output = TensorSpec.from_strided_tensor(c_shape, c_strides) + else: + output = TensorSpec.from_tensor(c_shape) + + return TestCase(operation_mode, inputs, output) + + +# Parse test cases +_TEST_CASES = [parse_matmul_test_case(data) for data in _TEST_CASES_DATA] + +# Data types +_TENSOR_DTYPES = [infinicore.float16, infinicore.bfloat16, infinicore.float32] + +# Tolerance +_TOLERANCE_MAP = { + infinicore.float16: {"atol": 0, "rtol": 1e-2}, + infinicore.float32: {"atol": 0, "rtol": 1e-3}, + infinicore.bfloat16: {"atol": 0, "rtol": 5e-2}, +} + + +class OpTest(BaseOperatorTest): + """Matmul test with simplified test case parsing""" + + def __init__(self): + super().__init__("Matmul") + + def get_test_cases(self): + return _TEST_CASES + + def get_tensor_dtypes(self): + return _TENSOR_DTYPES + + def get_tolerance_map(self): + return _TOLERANCE_MAP + + def torch_operator(self, a, b, out=None, **kwargs): + return torch.matmul(a, b, out=out) + + def infinicore_operator(self, a, b, out=None, **kwargs): + return infinicore.matmul(a, b, out=out) + + +def main(): + """Main entry point""" + runner = GenericTestRunner(OpTest) + runner.run_and_exit() + + +if __name__ == "__main__": + main() diff --git a/test/infinicore/ops/rms_norm.py b/test/infinicore/ops/rms_norm.py new file mode 100644 index 000000000..40249a745 --- /dev/null +++ b/test/infinicore/ops/rms_norm.py @@ -0,0 +1,132 @@ +import sys +import os + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) + +import torch +import infinicore +from framework.base import BaseOperatorTest, TensorSpec, TestCase +from framework.runner import GenericTestRunner + +# ============================================================================== +# Operator-specific configuration +# ============================================================================== + +# Test cases format: (operation_mode, y_shape, x_shape, w_shape, y_strides, x_strides) +_TEST_CASES_DATA = [ + (TestCase.BOTH, (1, 4), (1, 4), (4,), None, None), + (TestCase.BOTH, (2, 4), (2, 4), (4,), None, None), + (TestCase.BOTH, (2, 2, 4), (2, 2, 4), (4,), None, None), + (TestCase.BOTH, (2, 2, 4), (2, 2, 4), (4,), (12, 8, 1), (12, 8, 1)), + (TestCase.BOTH, (16, 2048), (16, 2048), (2048,), None, None), + (TestCase.BOTH, (16, 2048), (16, 2048), (2048,), (4096, 1), (4096, 1)), +] + + +def parse_rms_norm_test_case(data): + """ + Parse RMSNorm test case data according to format: + (operation_mode, y_shape, x_shape, w_shape, y_strides, x_strides) + """ + operation_mode = data[0] + y_shape = data[1] # Output shape + x_shape = data[2] # Input shape + w_shape = data[3] # Weight shape (1D) + y_strides = data[4] if len(data) > 4 else None + x_strides = data[5] if len(data) > 5 else None + + # Create input specifications + inputs = [] + + # Input tensor x + if x_strides is not None: + inputs.append(TensorSpec.from_strided_tensor(x_shape, x_strides)) + else: + inputs.append(TensorSpec.from_tensor(x_shape)) + + # Weight tensor (1D, always contiguous) + inputs.append(TensorSpec.from_tensor(w_shape)) + + # Output tensor + if y_strides is not None: + output = TensorSpec.from_strided_tensor(y_shape, y_strides) + else: + output = TensorSpec.from_tensor(y_shape) + + return TestCase(operation_mode, inputs, output) + + +# Parse test cases +_TEST_CASES = [parse_rms_norm_test_case(data) for data in _TEST_CASES_DATA] + +# Data types for individual tensors +_INPUT_DTYPES = [infinicore.float16, infinicore.bfloat16] +_WEIGHT_DTYPES = [infinicore.float16, infinicore.bfloat16, infinicore.float32] + +# Generate all dtype combinations +_DTYPE_COMBINATIONS = [] +for input_dtype in _INPUT_DTYPES: + for weight_dtype in _WEIGHT_DTYPES: + _DTYPE_COMBINATIONS.append( + { + "input_0": input_dtype, # x tensor + "input_1": weight_dtype, # weight tensor + "output": input_dtype, # output tensor (same as input) + } + ) + +# Base data types +_TENSOR_DTYPES = [infinicore.float16, infinicore.bfloat16] + +# Tolerance +_TOLERANCE_MAP = { + infinicore.float16: {"atol": 2e-3, "rtol": 2e-3}, + infinicore.bfloat16: {"atol": 1e-2, "rtol": 1e-2}, +} + +# EPSILON constant for RMSNorm +_EPSILON = 1e-5 + + +class OpTest(BaseOperatorTest): + """RMSNorm test with simplified test case parsing""" + + def __init__(self): + super().__init__("RMS_Norm") + + def get_test_cases(self): + return _TEST_CASES + + def get_tensor_dtypes(self): + return _TENSOR_DTYPES + + def get_tolerance_map(self): + return _TOLERANCE_MAP + + def get_dtype_combinations(self): + return _DTYPE_COMBINATIONS + + def torch_operator(self, x, weight, out=None, **kwargs): + input_dtype = x.dtype + hidden_states = x.to(torch.float32) + scale = hidden_states.pow(2).mean(-1, keepdim=True).add_(_EPSILON).rsqrt_() + result = (hidden_states * scale * weight).to(input_dtype) + + if out is not None: + out.set_(result) + return out + else: + return result + + def infinicore_operator(self, x, weight, out=None, **kwargs): + return infinicore.rms_norm(x, weight, _EPSILON, out=out) + + +def main(): + """Main entry point""" + runner = GenericTestRunner(OpTest) + runner.run_and_exit() + + +if __name__ == "__main__": + main() diff --git a/xmake.lua b/xmake.lua index 10c47aa69..ac3fad2ca 100644 --- a/xmake.lua +++ b/xmake.lua @@ -345,7 +345,7 @@ target("_infinicore") add_files("src/infinicore/context/*.cc") add_files("src/infinicore/context/*/*.cc") add_files("src/infinicore/tensor/*.cc") - add_files("src/infinicore/op/*/*.cc") + add_files("src/infinicore/ops/*/*.cc") add_files("src/infinicore/pybind11/**.cc") set_installdir("python/infinicore")