Skip to content

Commit 5e6340e

Browse files
committed
fmt
1 parent ae6eaa1 commit 5e6340e

3 files changed

Lines changed: 3 additions & 46 deletions

File tree

include/infinicore/quantization/gptq_qy.hpp

Lines changed: 2 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -83,9 +83,6 @@ inline std::vector<uint16_t> float_to_fp16_bits(const std::vector<float> &values
8383
}
8484
} // anonymous namespace
8585

86-
// ============================================================================
87-
// GPTQ_QY Class (最终修复版)
88-
// ============================================================================
8986
namespace infinicore::quantization {
9087

9188
class GPTQ_QY : public BaseQuantization {
@@ -123,9 +120,6 @@ class GPTQ_QY : public BaseQuantization {
123120
const int bits = weight_bits();
124121
const int values_per_int32 = 32 / bits;
125122

126-
// --------------------------------------------------------------------
127-
// 1. qweight 转换(保持不变)
128-
// --------------------------------------------------------------------
129123
{
130124
const auto &shape = original_qweight->shape();
131125
assert(shape.size() == 2);
@@ -148,9 +142,6 @@ class GPTQ_QY : public BaseQuantization {
148142
target_device);
149143
}
150144

151-
// --------------------------------------------------------------------
152-
// 2. qzeros 转换(保持 int32 -> fp32 -> fp16 逻辑)
153-
// --------------------------------------------------------------------
154145
{
155146
const auto &shape = original_qzeros->shape();
156147
assert(shape.size() == 2);
@@ -167,24 +158,15 @@ class GPTQ_QY : public BaseQuantization {
167158
target_device);
168159
}
169160

170-
// --------------------------------------------------------------------
171-
// 3. scales 转换(核心修复:如果输入已是 FP16,直接内存拷贝)
172-
// --------------------------------------------------------------------
173161
{
174162
auto scales_cpu = original_scales->to(Device::Type::CPU);
175163
size_t num_elements = scales_cpu->numel();
176164
const void *raw_data = scales_cpu->data();
177165

178166
std::vector<uint16_t> scales_fp16(num_elements);
179-
180-
// 关键:根据输入 dtype 决定处理方式
181167
if (scales_cpu->dtype() == DataType::F16) {
182-
// 输入已经是 FP16,直接 memcpy,不做任何转换!
183-
// spdlog::info("Scales is already FP16, performing direct memory copy");
184168
std::memcpy(scales_fp16.data(), raw_data, num_elements * sizeof(uint16_t));
185169
} else if (scales_cpu->dtype() == DataType::F32) {
186-
// 输入是 FP32,才需要转换
187-
// spdlog::info("Scales is FP32, converting to FP16");
188170
std::vector<float> scales_fp32(num_elements);
189171
std::memcpy(scales_fp32.data(), raw_data, num_elements * sizeof(float));
190172
scales_fp16 = ::float_to_fp16_bits(scales_fp32);
@@ -201,19 +183,15 @@ class GPTQ_QY : public BaseQuantization {
201183
target_device);
202184
}
203185

204-
// --------------------------------------------------------------------
205-
// 4. g_idx 处理
206-
// --------------------------------------------------------------------
207186
if (g_idx->numel() > 0) {
208187
g_idx_ = g_idx->to(target_device);
209188
}
210189

211190
converted_ = true;
212-
// spdlog::info("GPTQ_QY conversion completed successfully");
213191
}
214192

215193
void release_buffers() {
216-
converted_weight_ = Tensor(); // 赋值为空 Tensor,释放显存
194+
converted_weight_ = Tensor();
217195
converted_zeros_ = Tensor();
218196
converted_scales_ = Tensor();
219197
g_idx_ = Tensor();
@@ -226,16 +204,12 @@ class GPTQ_QY : public BaseQuantization {
226204
return;
227205
}
228206

229-
// 1. 执行转换(只读传入的原始数据)
230207
convert_from_gptq_w4a16(weight, zeros, scales, g_idx, target_device);
231208

232-
// 2. 转移所有权(Move 语义:converted_weight_ 的 impl_ 指针会置为 nullptr)
233-
// 原 weight/zeros/scales 持有的旧 shared_ptr 会被自动析构,释放显存
234209
weight = std::move(converted_weight_);
235210
zeros = std::move(converted_zeros_);
236211
scales = std::move(converted_scales_);
237212

238-
// 3. 清理内部状态
239213
converted_ = false;
240214
spdlog::debug("GPTQ_QY: Ownership transferred, internal buffers cleared.");
241215
}
@@ -343,4 +317,4 @@ class GPTQ_QY : public BaseQuantization {
343317
bool converted_ = false;
344318
};
345319

346-
} // namespace infinicore::quantization
320+
} // namespace infinicore::quantization

src/infinicore/nn/linear.cc

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
#include "infinicore/ops/linear_w4a16_awq.hpp"
77
#include "infinicore/ops/linear_w4a16_gptq_qy.hpp"
88
#include "infinicore/ops/linear_w8a8i8.hpp"
9-
#include <iostream>
109
#include <optional>
1110
#include <spdlog/spdlog.h>
1211

@@ -130,22 +129,6 @@ void BaseLinear::process_weights_after_loading() {
130129
assert(quantization_->get_quant_scheme() == infinicore::quantization::QuantScheme::GPTQ_W4A16_QY);
131130
}
132131
}
133-
// void BaseLinear::process_weights_after_loading() {
134-
// if (quantization_->get_quant_scheme() == infinicore::quantization::QuantScheme::GPTQ_W4A16 && device_.getType() == Device::Type::QY) {
135-
136-
// // 1. 创建目标量化器
137-
// auto config = quantization_->get_config();
138-
// auto gptq_qy = std::make_shared<infinicore::quantization::GPTQ_QY>(config);
139-
140-
// // 2. 🚀 一行完成:转换 + 所有权转移 + 旧显存释放
141-
// gptq_qy->convert_and_take_ownership(weight_, weight_zeros_, weight_scale_, gidx_, device_);
142-
143-
// // 3. 替换量化策略对象
144-
// quantization_ = std::move(gptq_qy);
145-
146-
// assert(quantization_->get_quant_scheme() == infinicore::quantization::QuantScheme::GPTQ_W4A16_QY);
147-
// }
148-
// }
149132
} // namespace infinicore::nn
150133

151134
namespace infinicore::nn {

src/infinicore/pybind11/tensor.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,4 +81,4 @@ inline void bind(py::module &m) {
8181
pybind11::arg("raw_ptr"), pybind11::arg("shape"), pybind11::arg("strides"), pybind11::arg("dtype"), pybind11::arg("device"));
8282
}
8383

84-
} // namespace infinicore::tensor
84+
} // namespace infinicore::tensor

0 commit comments

Comments
 (0)