Skip to content

Commit 73fb6a8

Browse files
authored
Merge pull request #1131 from InfiniTensor/issue/1126
Issue/1126 - quickgelu, gelutanh
2 parents 70712f2 + 2ab93e5 commit 73fb6a8

74 files changed

Lines changed: 1894 additions & 269 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

include/infinicore/adaptor/aten_adaptor.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@
66
#include <ATen/ATen.h>
77

88
#if defined(ENABLE_NVIDIA_API) || defined(ENABLE_QY_API)
9-
#include <c10/cuda/CUDAStream.h>
10-
#include <c10/cuda/CUDAGuard.h>
119
#include <ATen/cuda/CUDAContext.h>
10+
#include <c10/cuda/CUDAGuard.h>
11+
#include <c10/cuda/CUDAStream.h>
1212
#endif
1313

1414
namespace infinicore::adaptor {

include/infinicore/nn/embedding.hpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#pragma once
22

3-
#include "module.hpp"
43
#include "../ops.hpp"
4+
#include "module.hpp"
55
#include <optional>
66

77
namespace infinicore::nn {
@@ -78,10 +78,10 @@ class Embedding : public Module {
7878
INFINICORE_NN_PARAMETER(weight);
7979

8080
private:
81-
size_t num_embeddings_; // Vocabulary size
82-
size_t embedding_dim_; // Embedding dimension
83-
std::optional<int64_t> padding_idx_; // Optional padding index
84-
DataType dtype_; // Data type for embedding weights
81+
size_t num_embeddings_; // Vocabulary size
82+
size_t embedding_dim_; // Embedding dimension
83+
std::optional<int64_t> padding_idx_; // Optional padding index
84+
DataType dtype_; // Data type for embedding weights
8585
};
8686

8787
} // namespace infinicore::nn

include/infinicore/nn/module.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,10 @@
33
#include "../tensor.hpp"
44
#include "parameter.hpp"
55

6+
#include <spdlog/spdlog.h>
67
#include <type_traits>
78
#include <unordered_map>
89
#include <vector>
9-
#include <spdlog/spdlog.h>
1010

1111
namespace infinicore::nn {
1212
class Module {

include/infinicore/ops.hpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,27 +14,34 @@
1414
#include "ops/binary_cross_entropy_with_logits.hpp"
1515
#include "ops/causal_softmax.hpp"
1616
#include "ops/cdist.hpp"
17+
#include "ops/conv2d.hpp"
1718
#include "ops/cross_entropy.hpp"
1819
#include "ops/embedding.hpp"
1920
#include "ops/flash_attention.hpp"
2021
#include "ops/fmin.hpp"
2122
#include "ops/fmod.hpp"
23+
#include "ops/gelu.hpp"
24+
#include "ops/gelutanh.hpp"
2225
#include "ops/hardswish.hpp"
2326
#include "ops/hardtanh.hpp"
2427
#include "ops/kv_caching.hpp"
2528
#include "ops/layer_norm.hpp"
29+
#include "ops/linear.hpp"
2630
#include "ops/matmul.hpp"
2731
#include "ops/ones.hpp"
2832
#include "ops/paged_attention.hpp"
2933
#include "ops/paged_attention_prefill.hpp"
3034
#include "ops/paged_caching.hpp"
3135
#include "ops/per_tensor_dequant_i8.hpp"
3236
#include "ops/per_tensor_quant_i8.hpp"
37+
#include "ops/quickgelu.hpp"
3338
#include "ops/random_sample.hpp"
3439
#include "ops/rearrange.hpp"
3540
#include "ops/reciprocal.hpp"
41+
#include "ops/relu.hpp"
3642
#include "ops/rms_norm.hpp"
3743
#include "ops/rope.hpp"
3844
#include "ops/silu.hpp"
3945
#include "ops/silu_and_mul.hpp"
46+
#include "ops/softmax.hpp"
4047
#include "ops/swiglu.hpp"

include/infinicore/ops/conv2d.hpp

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
#pragma once
2+
3+
#include "../device.hpp"
4+
#include "common/op.hpp"
5+
6+
#include <cstddef>
7+
#include <vector>
8+
9+
namespace infinicore::op {
10+
class Conv2d {
11+
public:
12+
using schema = void (*)(Tensor, Tensor, Tensor, Tensor,
13+
const size_t *, const size_t *, const size_t *, size_t);
14+
static void execute(Tensor output,
15+
Tensor input,
16+
Tensor weight,
17+
Tensor bias,
18+
const size_t *pads,
19+
const size_t *strides,
20+
const size_t *dilations,
21+
size_t n);
22+
static common::OpDispatcher<schema> &dispatcher();
23+
};
24+
25+
Tensor conv2d(Tensor input,
26+
Tensor weight,
27+
Tensor bias,
28+
const std::vector<size_t> &pads,
29+
const std::vector<size_t> &strides,
30+
const std::vector<size_t> &dilations);
31+
void conv2d_(Tensor output,
32+
Tensor input,
33+
Tensor weight,
34+
Tensor bias,
35+
const std::vector<size_t> &pads,
36+
const std::vector<size_t> &strides,
37+
const std::vector<size_t> &dilations);
38+
} // namespace infinicore::op

include/infinicore/ops/gelu.hpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
#pragma once
2+
3+
#include "../device.hpp"
4+
#include "common/op.hpp"
5+
6+
namespace infinicore::op {
7+
class Gelu {
8+
public:
9+
using schema = void (*)(Tensor, Tensor);
10+
static void execute(Tensor output, Tensor input);
11+
static common::OpDispatcher<schema> &dispatcher();
12+
};
13+
14+
Tensor gelu(Tensor input);
15+
void gelu_(Tensor output, Tensor input);
16+
} // namespace infinicore::op
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
#pragma once
2+
3+
#include "../device.hpp"
4+
#include "common/op.hpp"
5+
6+
namespace infinicore::op {
7+
class GeluTanh {
8+
public:
9+
using schema = void (*)(Tensor, Tensor);
10+
static void execute(Tensor output, Tensor input);
11+
static common::OpDispatcher<schema> &dispatcher();
12+
};
13+
14+
Tensor gelu_tanh(Tensor input);
15+
void gelu_tanh_(Tensor output, Tensor input);
16+
} // namespace infinicore::op

include/infinicore/ops/mha_kvcache.hpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -22,14 +22,14 @@ namespace infinicore::op {
2222

2323
INFINICORE_GRAPH_OP_CLASS(
2424
MhaKVCache,
25-
Tensor, // out
26-
const Tensor &, // q
27-
const Tensor &, // k_cache
28-
const Tensor &, // v_cache
29-
const Tensor &, // seqlens_k
30-
const Tensor &, // block_table
31-
std::optional<Tensor>, // alibi_slopes
32-
float); // scale
25+
Tensor, // out
26+
const Tensor &, // q
27+
const Tensor &, // k_cache
28+
const Tensor &, // v_cache
29+
const Tensor &, // seqlens_k
30+
const Tensor &, // block_table
31+
std::optional<Tensor>, // alibi_slopes
32+
float); // scale
3333

3434
Tensor mha_kvcache(const Tensor &q,
3535
const Tensor &k_cache,
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
#pragma once
2+
3+
#include "../device.hpp"
4+
#include "common/op.hpp"
5+
6+
namespace infinicore::op {
7+
class QuickGelu {
8+
public:
9+
using schema = void (*)(Tensor, Tensor);
10+
static void execute(Tensor output, Tensor input);
11+
static common::OpDispatcher<schema> &dispatcher();
12+
};
13+
14+
Tensor quick_gelu(Tensor input);
15+
void quick_gelu_(Tensor output, Tensor input);
16+
} // namespace infinicore::op

include/infinicore/ops/relu.hpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
#pragma once
2+
3+
#include "../device.hpp"
4+
#include "common/op.hpp"
5+
6+
namespace infinicore::op {
7+
class Relu {
8+
public:
9+
using schema = void (*)(Tensor, Tensor);
10+
static void execute(Tensor output, Tensor input);
11+
static common::OpDispatcher<schema> &dispatcher();
12+
};
13+
14+
Tensor relu(Tensor input);
15+
void relu_(Tensor output, Tensor input);
16+
} // namespace infinicore::op

0 commit comments

Comments
 (0)