Skip to content

Commit b1f6af3

Browse files
issue/263 fix T2-1-4
1 parent d1f29df commit b1f6af3

File tree

11 files changed

+692
-446
lines changed

11 files changed

+692
-446
lines changed

include/infinicore_infer/models/qwen3vl.h

Lines changed: 35 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ typedef struct {
2626
qwen3vl_load_layer_fn load_attn_k_norm;
2727
qwen3vl_load_layer_fn load_attn_qkv_proj;
2828
qwen3vl_load_layer_fn load_attn_o_proj;
29-
29+
3030
// MLP
3131
qwen3vl_load_layer_fn load_mlp_norm;
3232
qwen3vl_load_layer_fn load_mlp_gate_up;
@@ -46,27 +46,27 @@ typedef struct {
4646
qwen3vl_load_layer_fn load_attn_qkv_weight;
4747
qwen3vl_load_layer_fn load_attn_qkv_bias;
4848

49-
//block mlp
49+
// block mlp
5050
qwen3vl_load_layer_fn load_mlp_linear_fc1_weight;
5151
qwen3vl_load_layer_fn load_mlp_linear_fc1_bias;
5252
qwen3vl_load_layer_fn load_mlp_linear_fc2_weight;
5353
qwen3vl_load_layer_fn load_mlp_linear_fc2_bias;
5454

55-
//block norm
56-
qwen3vl_load_layer_fn load_norm1_weight;
57-
qwen3vl_load_layer_fn load_norm1_bias;
58-
qwen3vl_load_layer_fn load_norm2_weight;
59-
qwen3vl_load_layer_fn load_norm2_bias;
55+
// block norm
56+
qwen3vl_load_layer_fn load_norm1_weight;
57+
qwen3vl_load_layer_fn load_norm1_bias;
58+
qwen3vl_load_layer_fn load_norm2_weight;
59+
qwen3vl_load_layer_fn load_norm2_bias;
6060

61-
//deepstack_merger
61+
// deepstack_merger
6262
qwen3vl_load_layer_fn load_deepstack_merger_linear_fc1_weight;
6363
qwen3vl_load_layer_fn load_deepstack_merger_linear_fc1_bias;
6464
qwen3vl_load_layer_fn load_deepstack_merger_linear_fc2_weight;
6565
qwen3vl_load_layer_fn load_deepstack_merger_linear_fc2_bias;
6666
qwen3vl_load_layer_fn load_deepstack_merger_norm_weight;
6767
qwen3vl_load_layer_fn load_deepstack_merger_norm_bias;
6868

69-
//merger
69+
// merger
7070
qwen3vl_load_global_fn load_merger_linear_fc1_weight;
7171
qwen3vl_load_global_fn load_merger_linear_fc1_bias;
7272
qwen3vl_load_global_fn load_merger_linear_fc2_weight;
@@ -76,7 +76,7 @@ typedef struct {
7676

7777
} Qwen3vlVisWeightLoader;
7878

79-
typedef struct {
79+
typedef struct {
8080
Qwen3vlLangWeightLoader lang_loader;
8181
Qwen3vlVisWeightLoader vis_loader;
8282
} Qwen3vlWeightLoader;
@@ -116,7 +116,7 @@ typedef struct {
116116
} Qwen3vlVisMeta;
117117

118118
typedef struct {
119-
infiniDtype_t dtype; //INFINI_DTYPE_BF16
119+
infiniDtype_t dtype; // INFINI_DTYPE_BF16
120120

121121
Qwen3vlTextMeta text_meta;
122122
Qwen3vlVisMeta vis_meta;
@@ -132,29 +132,29 @@ typedef struct {
132132
/// @param device 协处理器种类
133133
/// @param ndev 协处理器数量
134134
/// @param dev_ids 协处理器编号,长度为 ndev
135-
__C __export struct Qwen3vlModel *
135+
__INFINI_C __export struct Qwen3vlModel *
136136
createQwen3vlModel(const Qwen3vlMeta *,
137-
const Qwen3vlWeights *);
137+
const Qwen3vlWeights *);
138138

139-
__C Qwen3vlWeights *
139+
__INFINI_C Qwen3vlWeights *
140140
createQwen3vlWeights(const Qwen3vlMeta *meta,
141-
infiniDevice_t device,
142-
int ndev,
143-
const int *dev_ids,
144-
bool transpose_weight);
141+
infiniDevice_t device,
142+
int ndev,
143+
const int *dev_ids,
144+
bool transpose_weight);
145145

146-
__C __export Qwen3vlWeightLoader *
146+
__INFINI_C __export Qwen3vlWeightLoader *
147147
createQwen3vlWeightLoader();
148148

149149
/// @brief 销毁模型
150-
__C __export void destroyQwen3vlModel(struct Qwen3vlModel *);
150+
__INFINI_C __export void destroyQwen3vlModel(struct Qwen3vlModel *);
151151

152-
__C __export struct Qwen3vlCache *
152+
__INFINI_C __export struct Qwen3vlCache *
153153
createQwen3vlCache(const struct Qwen3vlModel *);
154154

155-
__C __export void
155+
__INFINI_C __export void
156156
dropQwen3vlCache(const struct Qwen3vlModel *,
157-
struct Qwen3vlCache *);
157+
struct Qwen3vlCache *);
158158

159159
/// @brief 批次推理一轮,并采样出新的 token
160160
/// @param tokens 输入 token 地址
@@ -167,18 +167,18 @@ dropQwen3vlCache(const struct Qwen3vlModel *,
167167
/// @param topk 采样 topk(1 表示贪心采样)
168168
/// @param topp 采样 topp
169169
/// @param output 输出 token 数组,每个请求一个输出,长度至少为nreq
170-
__C __export void
170+
__INFINI_C __export void
171171
inferBatchQwen3vl(struct Qwen3vlModel *,
172-
const uint32_t *tokens, uint32_t ntok,
173-
void *pixel_values, uint32_t total_patches,
174-
uint32_t *image_grid_thw, uint32_t num_images,
175-
void *pixel_values_videos, uint32_t total_patches_videos,
176-
uint32_t *video_grid_thw, uint32_t num_videos,
177-
uint32_t patch_features,
178-
const uint32_t *req_lens, uint32_t nreq, const uint32_t *req_pos,
179-
struct Qwen3vlCache **caches,
180-
const float *temperature, const uint32_t *topk, const float *topp,
181-
uint32_t *output);
172+
const uint32_t *tokens, uint32_t ntok,
173+
void *pixel_values, uint32_t total_patches,
174+
uint32_t *image_grid_thw, uint32_t num_images,
175+
void *pixel_values_videos, uint32_t total_patches_videos,
176+
uint32_t *video_grid_thw, uint32_t num_videos,
177+
uint32_t patch_features,
178+
const uint32_t *req_lens, uint32_t nreq, const uint32_t *req_pos,
179+
struct Qwen3vlCache **caches,
180+
const float *temperature, const uint32_t *topk, const float *topp,
181+
uint32_t *output);
182182

183183
/// @brief 批次推理一轮,输出 output embedding 后的 logits
184184
/// @param tokens 输入 token 地址
@@ -188,7 +188,7 @@ inferBatchQwen3vl(struct Qwen3vlModel *,
188188
/// @param req_pos 每个请求的起始位置
189189
/// @param kv_caches 每个请求的 KV Cache
190190
/// @param logits 输出 token 数组,每个请求一个输出,长度至少为nreq
191-
__C __export void
191+
__INFINI_C __export void
192192
forwardBatchQwen3vl(struct Qwen3vlModel *,
193193
const uint32_t *tokens, uint32_t ntok,
194194
void *pixel_values, uint32_t total_patches,

scripts/libinfinicore_infer/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
DeepSeekV3MetaCStruct,
77
DeepSeekV3WeightsCStruct,
88
DeepSeekV3WeightLoaderCStruct,
9+
DeepSeekV3CacheCStruct,
910
)
1011
from .qwen3vl import (
1112
Qwen3vlModel,
@@ -33,6 +34,7 @@
3334
"DeepSeekV3MetaCStruct",
3435
"DeepSeekV3WeightsCStruct",
3536
"DeepSeekV3WeightLoaderCStruct",
37+
"DeepSeekV3CacheCStruct",
3638
"Qwen3vlModel",
3739
"Qwen3vlMetaCStruct",
3840
"TextMetaCStruct",

scripts/libinfinicore_infer/qwen3vl.py

Lines changed: 22 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -183,15 +183,15 @@ def register_lib(cls, lib):
183183
POINTER(Qwen3vlModelCStruct),
184184
POINTER(c_uint),
185185
c_uint,
186-
c_void_p, # pixel_values,
187-
c_uint, # total_patches,
188-
POINTER(c_uint), # image_grid_thw,
189-
c_uint, # num_images,
190-
c_void_p, # pixel_values_videos,
191-
c_uint, # total_patches_videos,
192-
POINTER(c_uint), # video_grid_thw,
193-
c_uint, # num_videos,
194-
c_uint, # patch_features,
186+
c_void_p, # pixel_values,
187+
c_uint, # total_patches,
188+
POINTER(c_uint), # image_grid_thw,
189+
c_uint, # num_images,
190+
c_void_p, # pixel_values_videos,
191+
c_uint, # total_patches_videos,
192+
POINTER(c_uint), # video_grid_thw,
193+
c_uint, # num_videos,
194+
c_uint, # patch_features,
195195
POINTER(c_uint),
196196
c_uint,
197197
POINTER(c_uint),
@@ -206,15 +206,15 @@ def register_lib(cls, lib):
206206
POINTER(Qwen3vlModelCStruct),
207207
POINTER(c_uint),
208208
c_uint,
209-
c_void_p, # pixel_values,
210-
c_uint, # total_patches,
211-
POINTER(c_uint), # image_grid_thw,
212-
c_uint, # num_images,
213-
c_void_p, # pixel_values_videos,
214-
c_uint, # total_patches_videos,
215-
POINTER(c_uint), # video_grid_thw,
216-
c_uint, # num_videos,
217-
c_uint, # patch_features,
209+
c_void_p, # pixel_values,
210+
c_uint, # total_patches,
211+
POINTER(c_uint), # image_grid_thw,
212+
c_uint, # num_images,
213+
c_void_p, # pixel_values_videos,
214+
c_uint, # total_patches_videos,
215+
POINTER(c_uint), # video_grid_thw,
216+
c_uint, # num_videos,
217+
c_uint, # patch_features,
218218
POINTER(c_uint),
219219
c_uint,
220220
POINTER(c_uint),
@@ -226,7 +226,9 @@ def create_weight_loader(self):
226226
return self.lib.createQwen3vlWeightLoader()
227227

228228
def create_weights(self, meta, device_type, ndev, dev_ids, transpose_weight):
229-
return self.lib.createQwen3vlWeights(meta, device_type, ndev, dev_ids, transpose_weight)
229+
return self.lib.createQwen3vlWeights(
230+
meta, device_type, ndev, dev_ids, transpose_weight
231+
)
230232

231233
def create_model(self, meta, weights):
232234
return self.lib.createQwen3vlModel(meta, weights)
@@ -324,4 +326,4 @@ def forward_batch(
324326
req_pos,
325327
caches,
326328
logits,
327-
)
329+
)

0 commit comments

Comments
 (0)