-
Notifications
You must be signed in to change notification settings - Fork 66
Expand file tree
/
Copy pathminicpm_sala_decoderLayer.cpp
More file actions
61 lines (50 loc) · 3.12 KB
/
minicpm_sala_decoderLayer.cpp
File metadata and controls
61 lines (50 loc) · 3.12 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#include "minicpm_sala_decoderLayer.hpp"
#include "infinicore/ops.hpp"
#include <stdexcept>
#include <string>
#include <vector>
namespace infinilm::models::minicpm_sala {
MiniCPMSALADecoderLayer::MiniCPMSALADecoderLayer(std::shared_ptr<infinilm::config::ModelConfig> model_config,
size_t layer_idx,
const infinicore::Device &device)
: layer_idx_(layer_idx) {
const auto &dtype{model_config->get_dtype()};
size_t hidden_size = model_config->get<size_t>("hidden_size");
double rms_norm_eps = model_config->get<double>("rms_norm_eps");
INFINICORE_NN_MODULE_INIT(input_layernorm, hidden_size, rms_norm_eps, dtype, device);
INFINICORE_NN_MODULE_INIT(post_attention_layernorm, hidden_size, rms_norm_eps, dtype, device);
INFINICORE_NN_MODULE_INIT(mlp, model_config, device);
std::vector<std::string> mixer_types = model_config->get<std::vector<std::string>>("mixer_types");
std::string mixer_type = mixer_types[layer_idx];
if ("minicpm4" == mixer_type) {
self_attn_ = std::make_shared<MiniCPMSALAAttention>(this->register_module<InfLLMv2Attention>("self_attn", model_config, layer_idx, device));
} else if ("lightning" == mixer_type || "lightning_attn" == mixer_type || "lightning-attn" == mixer_type) {
self_attn_ = std::make_shared<MiniCPMSALAAttention>(this->register_module<LightningAttention>("self_attn", model_config, layer_idx, device));
} else {
throw std::runtime_error("infinilm::models::minicpm_sala::MiniCPMSALADecoderLayer: unsupported mixer_type '" + mixer_type + "' for layer " + std::to_string(layer_idx));
}
}
std::tuple<infinicore::Tensor, infinicore::Tensor> MiniCPMSALADecoderLayer::forward(const infinicore::Tensor &positions,
infinicore::Tensor &hidden_states,
infinicore::Tensor &residual) {
input_layernorm_->forward_inplace(hidden_states, residual);
hidden_states = std::visit(
[&](auto &attn_ptr) { return attn_ptr->forward(positions, hidden_states); }, *self_attn_);
post_attention_layernorm_->forward_inplace(hidden_states, residual);
hidden_states = mlp_->forward(hidden_states);
return std::make_tuple(hidden_states, residual);
}
infinicore::Tensor MiniCPMSALADecoderLayer::forward(const infinicore::Tensor &positions,
infinicore::Tensor &hidden_states) {
auto residual = hidden_states;
hidden_states = input_layernorm_->forward(hidden_states);
hidden_states = std::visit(
[&](auto &attn_ptr) { return attn_ptr->forward(positions, hidden_states); }, *self_attn_);
hidden_states = infinicore::op::add(residual, hidden_states);
residual = hidden_states;
hidden_states = post_attention_layernorm_->forward(hidden_states);
hidden_states = mlp_->forward(hidden_states);
hidden_states = infinicore::op::add(residual, hidden_states);
return hidden_states;
}
} // namespace infinilm::models::minicpm_sala