Skip to content

Commit 3d1e26f

Browse files
Susskind115wooway777
authored andcommitted
Add LayerNorm and vision ops wrappers
1 parent 6effbfb commit 3d1e26f

File tree

19 files changed

+696
-0
lines changed

19 files changed

+696
-0
lines changed

include/infinicore/ops.hpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,27 +14,34 @@
1414
#include "ops/binary_cross_entropy_with_logits.hpp"
1515
#include "ops/causal_softmax.hpp"
1616
#include "ops/cdist.hpp"
17+
#include "ops/conv2d.hpp"
1718
#include "ops/cross_entropy.hpp"
1819
#include "ops/embedding.hpp"
1920
#include "ops/flash_attention.hpp"
2021
#include "ops/fmin.hpp"
2122
#include "ops/fmod.hpp"
23+
#include "ops/gelu.hpp"
24+
#include "ops/gelutanh.hpp"
2225
#include "ops/hardswish.hpp"
2326
#include "ops/hardtanh.hpp"
2427
#include "ops/kv_caching.hpp"
2528
#include "ops/layer_norm.hpp"
29+
#include "ops/linear.hpp"
2630
#include "ops/matmul.hpp"
2731
#include "ops/ones.hpp"
2832
#include "ops/paged_attention.hpp"
2933
#include "ops/paged_attention_prefill.hpp"
3034
#include "ops/paged_caching.hpp"
3135
#include "ops/per_tensor_dequant_i8.hpp"
3236
#include "ops/per_tensor_quant_i8.hpp"
37+
#include "ops/quickgelu.hpp"
3338
#include "ops/random_sample.hpp"
3439
#include "ops/rearrange.hpp"
3540
#include "ops/reciprocal.hpp"
41+
#include "ops/relu.hpp"
3642
#include "ops/rms_norm.hpp"
3743
#include "ops/rope.hpp"
3844
#include "ops/silu.hpp"
3945
#include "ops/silu_and_mul.hpp"
46+
#include "ops/softmax.hpp"
4047
#include "ops/swiglu.hpp"

include/infinicore/ops/conv2d.hpp

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
#pragma once
2+
3+
#include "../device.hpp"
4+
#include "common/op.hpp"
5+
6+
#include <cstddef>
7+
#include <vector>
8+
9+
namespace infinicore::op {
10+
class Conv2d {
11+
public:
12+
using schema = void (*)(Tensor, Tensor, Tensor, Tensor,
13+
const size_t *, const size_t *, const size_t *, size_t);
14+
static void execute(Tensor output,
15+
Tensor input,
16+
Tensor weight,
17+
Tensor bias,
18+
const size_t *pads,
19+
const size_t *strides,
20+
const size_t *dilations,
21+
size_t n);
22+
static common::OpDispatcher<schema> &dispatcher();
23+
};
24+
25+
Tensor conv2d(Tensor input,
26+
Tensor weight,
27+
Tensor bias,
28+
const std::vector<size_t> &pads,
29+
const std::vector<size_t> &strides,
30+
const std::vector<size_t> &dilations);
31+
void conv2d_(Tensor output,
32+
Tensor input,
33+
Tensor weight,
34+
Tensor bias,
35+
const std::vector<size_t> &pads,
36+
const std::vector<size_t> &strides,
37+
const std::vector<size_t> &dilations);
38+
} // namespace infinicore::op

include/infinicore/ops/gelu.hpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
#pragma once
2+
3+
#include "../device.hpp"
4+
#include "common/op.hpp"
5+
6+
namespace infinicore::op {
7+
class Gelu {
8+
public:
9+
using schema = void (*)(Tensor, Tensor);
10+
static void execute(Tensor output, Tensor input);
11+
static common::OpDispatcher<schema> &dispatcher();
12+
};
13+
14+
Tensor gelu(Tensor input);
15+
void gelu_(Tensor output, Tensor input);
16+
} // namespace infinicore::op
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
#pragma once
2+
3+
#include "../device.hpp"
4+
#include "common/op.hpp"
5+
6+
namespace infinicore::op {
7+
class GeluTanh {
8+
public:
9+
using schema = void (*)(Tensor, Tensor);
10+
static void execute(Tensor output, Tensor input);
11+
static common::OpDispatcher<schema> &dispatcher();
12+
};
13+
14+
Tensor gelu_tanh(Tensor input);
15+
void gelu_tanh_(Tensor output, Tensor input);
16+
} // namespace infinicore::op
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
#pragma once
2+
3+
#include "../device.hpp"
4+
#include "common/op.hpp"
5+
6+
namespace infinicore::op {
7+
class QuickGelu {
8+
public:
9+
using schema = void (*)(Tensor, Tensor);
10+
static void execute(Tensor output, Tensor input);
11+
static common::OpDispatcher<schema> &dispatcher();
12+
};
13+
14+
Tensor quick_gelu(Tensor input);
15+
void quick_gelu_(Tensor output, Tensor input);
16+
} // namespace infinicore::op

include/infinicore/ops/relu.hpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
#pragma once
2+
3+
#include "../device.hpp"
4+
#include "common/op.hpp"
5+
6+
namespace infinicore::op {
7+
class Relu {
8+
public:
9+
using schema = void (*)(Tensor, Tensor);
10+
static void execute(Tensor output, Tensor input);
11+
static common::OpDispatcher<schema> &dispatcher();
12+
};
13+
14+
Tensor relu(Tensor input);
15+
void relu_(Tensor output, Tensor input);
16+
} // namespace infinicore::op

include/infinicore/ops/softmax.hpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
#pragma once
2+
3+
#include "../device.hpp"
4+
#include "common/op.hpp"
5+
6+
namespace infinicore::op {
7+
class Softmax {
8+
public:
9+
using schema = void (*)(Tensor, Tensor, int);
10+
static void execute(Tensor output, Tensor input, int axis);
11+
static common::OpDispatcher<schema> &dispatcher();
12+
};
13+
14+
Tensor softmax(Tensor input, int axis = -1);
15+
void softmax_(Tensor output, Tensor input, int axis = -1);
16+
} // namespace infinicore::op
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
#include "infinicore/ops/conv2d.hpp"
2+
3+
#include "../../utils.hpp"
4+
5+
#include <stdexcept>
6+
7+
namespace infinicore::op {
8+
9+
common::OpDispatcher<Conv2d::schema> &Conv2d::dispatcher() {
10+
static common::OpDispatcher<Conv2d::schema> dispatcher_;
11+
return dispatcher_;
12+
};
13+
14+
void Conv2d::execute(Tensor output,
15+
Tensor input,
16+
Tensor weight,
17+
Tensor bias,
18+
const size_t *pads,
19+
const size_t *strides,
20+
const size_t *dilations,
21+
size_t n) {
22+
INFINICORE_ASSERT_TENSORS_SAME_DEVICE(output, input, weight, bias);
23+
infinicore::context::setDevice(output->device());
24+
auto device_type = output->device().getType();
25+
auto func = dispatcher().lookup(device_type);
26+
27+
if (func == nullptr) {
28+
throw std::runtime_error("No Conv2d implementation found for device type: " + std::to_string(static_cast<int>(device_type)));
29+
}
30+
31+
func(output, input, weight, bias, pads, strides, dilations, n);
32+
}
33+
34+
Tensor conv2d(Tensor input,
35+
Tensor weight,
36+
Tensor bias,
37+
const std::vector<size_t> &pads,
38+
const std::vector<size_t> &strides,
39+
const std::vector<size_t> &dilations) {
40+
// Output shape should be pre-computed by caller; allocate a conservative placeholder.
41+
// This helper is rarely used in performance-critical paths.
42+
Shape shape = input->shape();
43+
auto output = Tensor::empty(shape, input->dtype(), input->device());
44+
conv2d_(output, input, weight, bias, pads, strides, dilations);
45+
return output;
46+
}
47+
48+
void conv2d_(Tensor output,
49+
Tensor input,
50+
Tensor weight,
51+
Tensor bias,
52+
const std::vector<size_t> &pads,
53+
const std::vector<size_t> &strides,
54+
const std::vector<size_t> &dilations) {
55+
if (pads.size() != strides.size() || pads.size() != dilations.size()) {
56+
throw std::runtime_error("conv2d_: pads/strides/dilations must have the same size");
57+
}
58+
Conv2d::execute(output,
59+
input,
60+
weight,
61+
bias,
62+
pads.data(),
63+
strides.data(),
64+
dilations.data(),
65+
pads.size());
66+
}
67+
} // namespace infinicore::op
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
#include "../../utils.hpp"
2+
#include "infinicore/common/hash.hpp"
3+
#include "infinicore/ops/common/cache.hpp"
4+
#include "infinicore/ops/conv2d.hpp"
5+
#include <infiniop.h>
6+
7+
namespace infinicore::op::conv2d_impl::infiniop {
8+
9+
thread_local common::OpCache<size_t, infiniopConvDescriptor_t> caches(
10+
100, // capacity
11+
[](infiniopConvDescriptor_t &desc) {
12+
if (desc != nullptr) {
13+
INFINICORE_CHECK_ERROR(infiniopDestroyConvDescriptor(desc));
14+
desc = nullptr;
15+
}
16+
});
17+
18+
void calculate(Tensor output,
19+
Tensor input,
20+
Tensor weight,
21+
Tensor bias,
22+
const size_t *pads,
23+
const size_t *strides,
24+
const size_t *dilations,
25+
size_t n) {
26+
size_t seed = hash_combine(output, input, weight, bias, n);
27+
for (size_t i = 0; i < n; ++i) {
28+
hash_combine(seed, pads[i], strides[i], dilations[i]);
29+
}
30+
31+
auto device = context::getDevice();
32+
auto &cache = caches.getCache(device);
33+
34+
auto desc_opt = cache.get(seed);
35+
infiniopConvDescriptor_t desc = nullptr;
36+
37+
if (!desc_opt) {
38+
INFINICORE_CHECK_ERROR(infiniopCreateConvDescriptor(
39+
context::getInfiniopHandle(device), &desc,
40+
output->desc(), input->desc(), weight->desc(),
41+
bias ? bias->desc() : nullptr,
42+
const_cast<size_t *>(pads),
43+
const_cast<size_t *>(strides),
44+
const_cast<size_t *>(dilations),
45+
n));
46+
cache.put(seed, desc);
47+
} else {
48+
desc = *desc_opt;
49+
}
50+
51+
size_t workspace_size = 0;
52+
INFINICORE_CHECK_ERROR(infiniopGetConvWorkspaceSize(desc, &workspace_size));
53+
std::shared_ptr<Memory> workspace = context::allocateMemory(workspace_size);
54+
55+
INFINICORE_CHECK_ERROR(infiniopConv(
56+
desc, workspace->data(), workspace_size,
57+
output->data(),
58+
input->data(),
59+
weight->data(),
60+
bias ? bias->data() : nullptr,
61+
context::getStream()));
62+
}
63+
64+
static bool registered = []() {
65+
Conv2d::dispatcher().registerAll(&calculate, false);
66+
return true;
67+
}();
68+
69+
} // namespace infinicore::op::conv2d_impl::infiniop

src/infinicore/ops/gelu/gelu.cc

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
#include "infinicore/ops/gelu.hpp"
2+
3+
#include "../../utils.hpp"
4+
5+
#include <stdexcept>
6+
7+
namespace infinicore::op {
8+
9+
common::OpDispatcher<Gelu::schema> &Gelu::dispatcher() {
10+
static common::OpDispatcher<Gelu::schema> dispatcher_;
11+
return dispatcher_;
12+
};
13+
14+
void Gelu::execute(Tensor output, Tensor input) {
15+
INFINICORE_ASSERT_TENSORS_SAME_DEVICE(output, input);
16+
infinicore::context::setDevice(output->device());
17+
auto device_type = output->device().getType();
18+
auto func = dispatcher().lookup(device_type);
19+
20+
if (func == nullptr) {
21+
throw std::runtime_error("No Gelu implementation found for device type: " + std::to_string(static_cast<int>(device_type)));
22+
}
23+
24+
func(output, input);
25+
}
26+
27+
Tensor gelu(Tensor input) {
28+
Shape shape = input->shape();
29+
auto output = Tensor::empty(shape, input->dtype(), input->device());
30+
gelu_(output, input);
31+
return output;
32+
}
33+
34+
void gelu_(Tensor output, Tensor input) {
35+
Gelu::execute(output, input);
36+
}
37+
} // namespace infinicore::op

0 commit comments

Comments
 (0)