@@ -26,7 +26,7 @@ typedef struct {
2626 qwen3vl_load_layer_fn load_attn_k_norm ;
2727 qwen3vl_load_layer_fn load_attn_qkv_proj ;
2828 qwen3vl_load_layer_fn load_attn_o_proj ;
29-
29+
3030 // MLP
3131 qwen3vl_load_layer_fn load_mlp_norm ;
3232 qwen3vl_load_layer_fn load_mlp_gate_up ;
@@ -46,27 +46,27 @@ typedef struct {
4646 qwen3vl_load_layer_fn load_attn_qkv_weight ;
4747 qwen3vl_load_layer_fn load_attn_qkv_bias ;
4848
49- //block mlp
49+ // block mlp
5050 qwen3vl_load_layer_fn load_mlp_linear_fc1_weight ;
5151 qwen3vl_load_layer_fn load_mlp_linear_fc1_bias ;
5252 qwen3vl_load_layer_fn load_mlp_linear_fc2_weight ;
5353 qwen3vl_load_layer_fn load_mlp_linear_fc2_bias ;
5454
55- //block norm
56- qwen3vl_load_layer_fn load_norm1_weight ;
57- qwen3vl_load_layer_fn load_norm1_bias ;
58- qwen3vl_load_layer_fn load_norm2_weight ;
59- qwen3vl_load_layer_fn load_norm2_bias ;
55+ // block norm
56+ qwen3vl_load_layer_fn load_norm1_weight ;
57+ qwen3vl_load_layer_fn load_norm1_bias ;
58+ qwen3vl_load_layer_fn load_norm2_weight ;
59+ qwen3vl_load_layer_fn load_norm2_bias ;
6060
61- //deepstack_merger
61+ // deepstack_merger
6262 qwen3vl_load_layer_fn load_deepstack_merger_linear_fc1_weight ;
6363 qwen3vl_load_layer_fn load_deepstack_merger_linear_fc1_bias ;
6464 qwen3vl_load_layer_fn load_deepstack_merger_linear_fc2_weight ;
6565 qwen3vl_load_layer_fn load_deepstack_merger_linear_fc2_bias ;
6666 qwen3vl_load_layer_fn load_deepstack_merger_norm_weight ;
6767 qwen3vl_load_layer_fn load_deepstack_merger_norm_bias ;
6868
69- //merger
69+ // merger
7070 qwen3vl_load_global_fn load_merger_linear_fc1_weight ;
7171 qwen3vl_load_global_fn load_merger_linear_fc1_bias ;
7272 qwen3vl_load_global_fn load_merger_linear_fc2_weight ;
@@ -76,7 +76,7 @@ typedef struct {
7676
7777} Qwen3vlVisWeightLoader ;
7878
79- typedef struct {
79+ typedef struct {
8080 Qwen3vlLangWeightLoader lang_loader ;
8181 Qwen3vlVisWeightLoader vis_loader ;
8282} Qwen3vlWeightLoader ;
@@ -116,7 +116,7 @@ typedef struct {
116116} Qwen3vlVisMeta ;
117117
118118typedef struct {
119- infiniDtype_t dtype ; //INFINI_DTYPE_BF16
119+ infiniDtype_t dtype ; // INFINI_DTYPE_BF16
120120
121121 Qwen3vlTextMeta text_meta ;
122122 Qwen3vlVisMeta vis_meta ;
@@ -132,29 +132,29 @@ typedef struct {
132132/// @param device 协处理器种类
133133/// @param ndev 协处理器数量
134134/// @param dev_ids 协处理器编号,长度为 ndev
135- __C __export struct Qwen3vlModel *
135+ __INFINI_C __export struct Qwen3vlModel *
136136createQwen3vlModel (const Qwen3vlMeta * ,
137- const Qwen3vlWeights * );
137+ const Qwen3vlWeights * );
138138
139- __C Qwen3vlWeights *
139+ __INFINI_C Qwen3vlWeights *
140140createQwen3vlWeights (const Qwen3vlMeta * meta ,
141- infiniDevice_t device ,
142- int ndev ,
143- const int * dev_ids ,
144- bool transpose_weight );
141+ infiniDevice_t device ,
142+ int ndev ,
143+ const int * dev_ids ,
144+ bool transpose_weight );
145145
146- __C __export Qwen3vlWeightLoader *
146+ __INFINI_C __export Qwen3vlWeightLoader *
147147createQwen3vlWeightLoader ();
148148
149149/// @brief 销毁模型
150- __C __export void destroyQwen3vlModel (struct Qwen3vlModel * );
150+ __INFINI_C __export void destroyQwen3vlModel (struct Qwen3vlModel * );
151151
152- __C __export struct Qwen3vlCache *
152+ __INFINI_C __export struct Qwen3vlCache *
153153createQwen3vlCache (const struct Qwen3vlModel * );
154154
155- __C __export void
155+ __INFINI_C __export void
156156dropQwen3vlCache (const struct Qwen3vlModel * ,
157- struct Qwen3vlCache * );
157+ struct Qwen3vlCache * );
158158
159159/// @brief 批次推理一轮,并采样出新的 token
160160/// @param tokens 输入 token 地址
@@ -167,18 +167,18 @@ dropQwen3vlCache(const struct Qwen3vlModel *,
167167/// @param topk 采样 topk(1 表示贪心采样)
168168/// @param topp 采样 topp
169169/// @param output 输出 token 数组,每个请求一个输出,长度至少为nreq
170- __C __export void
170+ __INFINI_C __export void
171171inferBatchQwen3vl (struct Qwen3vlModel * ,
172- const uint32_t * tokens , uint32_t ntok ,
173- void * pixel_values , uint32_t total_patches ,
174- uint32_t * image_grid_thw , uint32_t num_images ,
175- void * pixel_values_videos , uint32_t total_patches_videos ,
176- uint32_t * video_grid_thw , uint32_t num_videos ,
177- uint32_t patch_features ,
178- const uint32_t * req_lens , uint32_t nreq , const uint32_t * req_pos ,
179- struct Qwen3vlCache * * caches ,
180- const float * temperature , const uint32_t * topk , const float * topp ,
181- uint32_t * output );
172+ const uint32_t * tokens , uint32_t ntok ,
173+ void * pixel_values , uint32_t total_patches ,
174+ uint32_t * image_grid_thw , uint32_t num_images ,
175+ void * pixel_values_videos , uint32_t total_patches_videos ,
176+ uint32_t * video_grid_thw , uint32_t num_videos ,
177+ uint32_t patch_features ,
178+ const uint32_t * req_lens , uint32_t nreq , const uint32_t * req_pos ,
179+ struct Qwen3vlCache * * caches ,
180+ const float * temperature , const uint32_t * topk , const float * topp ,
181+ uint32_t * output );
182182
183183/// @brief 批次推理一轮,输出 output embedding 后的 logits
184184/// @param tokens 输入 token 地址
@@ -188,7 +188,7 @@ inferBatchQwen3vl(struct Qwen3vlModel *,
188188/// @param req_pos 每个请求的起始位置
189189/// @param kv_caches 每个请求的 KV Cache
190190/// @param logits 输出 token 数组,每个请求一个输出,长度至少为nreq
191- __C __export void
191+ __INFINI_C __export void
192192forwardBatchQwen3vl (struct Qwen3vlModel * ,
193193 const uint32_t * tokens , uint32_t ntok ,
194194 void * pixel_values , uint32_t total_patches ,
0 commit comments