Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions include/infinicore/adaptor/aten_adaptor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
#include <ATen/ATen.h>

#if defined(ENABLE_NVIDIA_API) || defined(ENABLE_QY_API)
#include <c10/cuda/CUDAStream.h>
#include <c10/cuda/CUDAGuard.h>
#include <ATen/cuda/CUDAContext.h>
#include <c10/cuda/CUDAGuard.h>
#include <c10/cuda/CUDAStream.h>
#endif

namespace infinicore::adaptor {
Expand Down
10 changes: 5 additions & 5 deletions include/infinicore/nn/embedding.hpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#pragma once

#include "module.hpp"
#include "../ops.hpp"
#include "module.hpp"
#include <optional>

namespace infinicore::nn {
Expand Down Expand Up @@ -78,10 +78,10 @@ class Embedding : public Module {
INFINICORE_NN_PARAMETER(weight);

private:
size_t num_embeddings_; // Vocabulary size
size_t embedding_dim_; // Embedding dimension
std::optional<int64_t> padding_idx_; // Optional padding index
DataType dtype_; // Data type for embedding weights
size_t num_embeddings_; // Vocabulary size
size_t embedding_dim_; // Embedding dimension
std::optional<int64_t> padding_idx_; // Optional padding index
DataType dtype_; // Data type for embedding weights
};

} // namespace infinicore::nn
2 changes: 1 addition & 1 deletion include/infinicore/nn/module.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
#include "../tensor.hpp"
#include "parameter.hpp"

#include <spdlog/spdlog.h>
#include <type_traits>
#include <unordered_map>
#include <vector>
#include <spdlog/spdlog.h>

namespace infinicore::nn {
class Module {
Expand Down
7 changes: 7 additions & 0 deletions include/infinicore/ops.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,27 +14,34 @@
#include "ops/binary_cross_entropy_with_logits.hpp"
#include "ops/causal_softmax.hpp"
#include "ops/cdist.hpp"
#include "ops/conv2d.hpp"
#include "ops/cross_entropy.hpp"
#include "ops/embedding.hpp"
#include "ops/flash_attention.hpp"
#include "ops/fmin.hpp"
#include "ops/fmod.hpp"
#include "ops/gelu.hpp"
#include "ops/gelutanh.hpp"
#include "ops/hardswish.hpp"
#include "ops/hardtanh.hpp"
#include "ops/kv_caching.hpp"
#include "ops/layer_norm.hpp"
#include "ops/linear.hpp"
#include "ops/matmul.hpp"
#include "ops/ones.hpp"
#include "ops/paged_attention.hpp"
#include "ops/paged_attention_prefill.hpp"
#include "ops/paged_caching.hpp"
#include "ops/per_tensor_dequant_i8.hpp"
#include "ops/per_tensor_quant_i8.hpp"
#include "ops/quickgelu.hpp"
#include "ops/random_sample.hpp"
#include "ops/rearrange.hpp"
#include "ops/reciprocal.hpp"
#include "ops/relu.hpp"
#include "ops/rms_norm.hpp"
#include "ops/rope.hpp"
#include "ops/silu.hpp"
#include "ops/silu_and_mul.hpp"
#include "ops/softmax.hpp"
#include "ops/swiglu.hpp"
38 changes: 38 additions & 0 deletions include/infinicore/ops/conv2d.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#pragma once

#include "../device.hpp"
#include "common/op.hpp"

#include <cstddef>
#include <vector>

namespace infinicore::op {
class Conv2d {
public:
using schema = void (*)(Tensor, Tensor, Tensor, Tensor,
const size_t *, const size_t *, const size_t *, size_t);
static void execute(Tensor output,
Tensor input,
Tensor weight,
Tensor bias,
const size_t *pads,
const size_t *strides,
const size_t *dilations,
size_t n);
static common::OpDispatcher<schema> &dispatcher();
};

Tensor conv2d(Tensor input,
Tensor weight,
Tensor bias,
const std::vector<size_t> &pads,
const std::vector<size_t> &strides,
const std::vector<size_t> &dilations);
void conv2d_(Tensor output,
Tensor input,
Tensor weight,
Tensor bias,
const std::vector<size_t> &pads,
const std::vector<size_t> &strides,
const std::vector<size_t> &dilations);
} // namespace infinicore::op
16 changes: 16 additions & 0 deletions include/infinicore/ops/gelu.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#pragma once

#include "../device.hpp"
#include "common/op.hpp"

namespace infinicore::op {
class Gelu {
public:
using schema = void (*)(Tensor, Tensor);
static void execute(Tensor output, Tensor input);
static common::OpDispatcher<schema> &dispatcher();
};

Tensor gelu(Tensor input);
void gelu_(Tensor output, Tensor input);
} // namespace infinicore::op
16 changes: 16 additions & 0 deletions include/infinicore/ops/gelutanh.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#pragma once

#include "../device.hpp"
#include "common/op.hpp"

namespace infinicore::op {
class GeluTanh {
public:
using schema = void (*)(Tensor, Tensor);
static void execute(Tensor output, Tensor input);
static common::OpDispatcher<schema> &dispatcher();
};

Tensor gelu_tanh(Tensor input);
void gelu_tanh_(Tensor output, Tensor input);
} // namespace infinicore::op
16 changes: 8 additions & 8 deletions include/infinicore/ops/mha_kvcache.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,14 @@ namespace infinicore::op {

INFINICORE_GRAPH_OP_CLASS(
MhaKVCache,
Tensor, // out
const Tensor &, // q
const Tensor &, // k_cache
const Tensor &, // v_cache
const Tensor &, // seqlens_k
const Tensor &, // block_table
std::optional<Tensor>, // alibi_slopes
float); // scale
Tensor, // out
const Tensor &, // q
const Tensor &, // k_cache
const Tensor &, // v_cache
const Tensor &, // seqlens_k
const Tensor &, // block_table
std::optional<Tensor>, // alibi_slopes
float); // scale

Tensor mha_kvcache(const Tensor &q,
const Tensor &k_cache,
Expand Down
16 changes: 16 additions & 0 deletions include/infinicore/ops/quickgelu.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#pragma once

#include "../device.hpp"
#include "common/op.hpp"

namespace infinicore::op {
class QuickGelu {
public:
using schema = void (*)(Tensor, Tensor);
static void execute(Tensor output, Tensor input);
static common::OpDispatcher<schema> &dispatcher();
};

Tensor quick_gelu(Tensor input);
void quick_gelu_(Tensor output, Tensor input);
} // namespace infinicore::op
16 changes: 16 additions & 0 deletions include/infinicore/ops/relu.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#pragma once

#include "../device.hpp"
#include "common/op.hpp"

namespace infinicore::op {
class Relu {
public:
using schema = void (*)(Tensor, Tensor);
static void execute(Tensor output, Tensor input);
static common::OpDispatcher<schema> &dispatcher();
};

Tensor relu(Tensor input);
void relu_(Tensor output, Tensor input);
} // namespace infinicore::op
16 changes: 16 additions & 0 deletions include/infinicore/ops/softmax.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#pragma once

#include "../device.hpp"
#include "common/op.hpp"

namespace infinicore::op {
class Softmax {
public:
using schema = void (*)(Tensor, Tensor, int);
static void execute(Tensor output, Tensor input, int axis);
static common::OpDispatcher<schema> &dispatcher();
};

Tensor softmax(Tensor input, int axis = -1);
void softmax_(Tensor output, Tensor input, int axis = -1);
} // namespace infinicore::op
2 changes: 1 addition & 1 deletion include/infinicore/quantization/compressed_tensors.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ class CompressedTensors : public BaseQuantization {
// information and support multiple quantization schemes.
public:
explicit CompressedTensors(const nlohmann::json &quant_config)
: BaseQuantization(quant_config) {};
: BaseQuantization(quant_config){};

infinicore::quantization::QuantScheme
get_quant_scheme() const override {
Expand Down
2 changes: 1 addition & 1 deletion include/infinicore/quantization/none_quantizaiton.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ class NoneQuantization : public BaseQuantization {
// information and support multiple quantization schemes.
public:
explicit NoneQuantization(const nlohmann::json &quant_config)
: BaseQuantization(quant_config) {};
: BaseQuantization(quant_config){};

infinicore::quantization::QuantScheme
get_quant_scheme() const override {
Expand Down
2 changes: 2 additions & 0 deletions include/infiniop.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
#include "infiniop/ops/fmin.h"
#include "infiniop/ops/fmod.h"
#include "infiniop/ops/gelu.h"
#include "infiniop/ops/gelutanh.h"
#include "infiniop/ops/gemm.h"
#include "infiniop/ops/hardswish.h"
#include "infiniop/ops/hardtanh.h"
Expand Down Expand Up @@ -84,6 +85,7 @@
#include "infiniop/ops/pixel_shuffle.h"
#include "infiniop/ops/quant/per_channel_quant_int8.h"
#include "infiniop/ops/quant/per_tensor_quant_int8.h"
#include "infiniop/ops/quickgelu.h"
#include "infiniop/ops/random_sample.h"
#include "infiniop/ops/rearrange.h"
#include "infiniop/ops/reciprocal.h"
Expand Down
20 changes: 10 additions & 10 deletions include/infiniop/ops/add.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,20 +6,20 @@
typedef struct InfiniopDescriptor *infiniopAddDescriptor_t;

__INFINI_C __export infiniStatus_t infiniopCreateAddDescriptor(infiniopHandle_t handle,
infiniopAddDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t c,
infiniopTensorDescriptor_t a,
infiniopTensorDescriptor_t b);
infiniopAddDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t c,
infiniopTensorDescriptor_t a,
infiniopTensorDescriptor_t b);

__INFINI_C __export infiniStatus_t infiniopGetAddWorkspaceSize(infiniopAddDescriptor_t desc, size_t *size);

__INFINI_C __export infiniStatus_t infiniopAdd(infiniopAddDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *c,
const void *a,
const void *b,
void *stream);
void *workspace,
size_t workspace_size,
void *c,
const void *a,
const void *b,
void *stream);

__INFINI_C __export infiniStatus_t infiniopDestroyAddDescriptor(infiniopAddDescriptor_t desc);

Expand Down
16 changes: 8 additions & 8 deletions include/infiniop/ops/add_rms_norm.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,14 @@ __INFINI_C __export infiniStatus_t infiniopCreateAddRMSNormDescriptor(
__INFINI_C __export infiniStatus_t infiniopGetAddRMSNormWorkspaceSize(infiniopAddRMSNormDescriptor_t desc, size_t *size);

__INFINI_C __export infiniStatus_t infiniopAddRMSNorm(infiniopAddRMSNormDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *y,
void *residual_out,
const void *a,
const void *b,
const void *weight,
void *stream);
void *workspace,
size_t workspace_size,
void *y,
void *residual_out,
const void *a,
const void *b,
const void *weight,
void *stream);

__INFINI_C __export infiniStatus_t infiniopDestroyAddRMSNormDescriptor(infiniopAddRMSNormDescriptor_t desc);

Expand Down
34 changes: 17 additions & 17 deletions include/infiniop/ops/attention.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,27 +8,27 @@
typedef struct InfiniopDescriptor *infiniopAttentionDescriptor_t;

__INFINI_C __export infiniStatus_t infiniopCreateAttentionDescriptor(infiniopHandle_t handle,
infiniopAttentionDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t out_desc,
infiniopTensorDescriptor_t q_desc,
infiniopTensorDescriptor_t k_desc,
infiniopTensorDescriptor_t v_desc,
infiniopTensorDescriptor_t k_cache_desc,
infiniopTensorDescriptor_t v_cache_desc,
size_t pos);
infiniopAttentionDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t out_desc,
infiniopTensorDescriptor_t q_desc,
infiniopTensorDescriptor_t k_desc,
infiniopTensorDescriptor_t v_desc,
infiniopTensorDescriptor_t k_cache_desc,
infiniopTensorDescriptor_t v_cache_desc,
size_t pos);

__INFINI_C __export infiniStatus_t infiniopGetAttentionWorkspaceSize(infiniopAttentionDescriptor_t desc, size_t *size);

__INFINI_C __export infiniStatus_t infiniopAttention(infiniopAttentionDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *out,
const void *q,
const void *k,
const void *v,
void *k_cache,
void *v_cache,
void *stream);
void *workspace,
size_t workspace_size,
void *out,
const void *q,
const void *k,
const void *v,
void *k_cache,
void *v_cache,
void *stream);

__INFINI_C __export infiniStatus_t infiniopDestroyAttentionDescriptor(infiniopAttentionDescriptor_t desc);
#endif
24 changes: 12 additions & 12 deletions include/infiniop/ops/clip.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,22 +6,22 @@
typedef struct InfiniopDescriptor *infiniopClipDescriptor_t;

__INFINI_C __export infiniStatus_t infiniopCreateClipDescriptor(infiniopHandle_t handle,
infiniopClipDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t y,
infiniopTensorDescriptor_t x,
infiniopTensorDescriptor_t min_val,
infiniopTensorDescriptor_t max_val);
infiniopClipDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t y,
infiniopTensorDescriptor_t x,
infiniopTensorDescriptor_t min_val,
infiniopTensorDescriptor_t max_val);

__INFINI_C __export infiniStatus_t infiniopGetClipWorkspaceSize(infiniopClipDescriptor_t desc, size_t *size);

__INFINI_C __export infiniStatus_t infiniopClip(infiniopClipDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *y,
const void *x,
const void *min_val,
const void *max_val,
void *stream);
void *workspace,
size_t workspace_size,
void *y,
const void *x,
const void *min_val,
const void *max_val,
void *stream);

__INFINI_C __export infiniStatus_t infiniopDestroyClipDescriptor(infiniopClipDescriptor_t desc);

Expand Down
Loading
Loading