Skip to content

Commit 91b9bc0

Browse files
iosmersguanshihui]
authored andcommitted
[XPU][Docs] Update Release2.5 Note (PaddlePaddle#7187)
* update docs * update * update
1 parent bb1f977 commit 91b9bc0

7 files changed

Lines changed: 1133 additions & 115 deletions

File tree

custom_ops/gpu_ops/speculate_decoding/speculate_limit_thinking_content_length.cu

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ __global__ void speculate_limit_thinking_content_length_kernel(
3434
int64_t* next_tokens, // [bs, tokens_per_step]
3535
const int* max_think_lens, // [bs]
3636
int* max_reply_lens, // [bs]
37-
int64_t* step_idx, // [bs]
37+
const int64_t* step_idx, // [bs]
3838
const int64_t* eos_token_ids, // [eos_len]
3939
int* limit_status, // [bs]
4040
int* accept_num, // [bs]
@@ -68,7 +68,7 @@ __global__ void speculate_limit_thinking_content_length_kernel(
6868
int new_accept_num = original_accept_num;
6969

7070
// 本 step 的 token offset 对应的绝对 step
71-
const int64_t current_base_step = step_idx[bid] - original_accept_num + 1;
71+
const int64_t current_base_step = step_idx[bid] + 1;
7272

7373
for (int token_offset = 0; token_offset < original_accept_num;
7474
token_offset++) {
@@ -100,8 +100,8 @@ __global__ void speculate_limit_thinking_content_length_kernel(
100100
// inject_token_ids[0])
101101
if (status == 0 &&
102102
(current_step - 1) ==
103-
max_think_len) { // current_step - 1 是因为 speculate_verify 里
104-
// step_idx + 1 了
103+
max_think_len) { // current_step - 1 : 已输出 current_step-1
104+
// 个thinking token
105105
status = (inject_len > 0) ? 1 : done_status;
106106
}
107107
} else if (max_think_len == 0) {
@@ -181,13 +181,6 @@ __global__ void speculate_limit_thinking_content_length_kernel(
181181
}
182182
}
183183

184-
// 更新 step_idx / accept_num(被截断的 token 需要回退
185-
// step_idx)
186-
const int discarded_tokens = original_accept_num - new_accept_num;
187-
if (discarded_tokens > 0) {
188-
step_idx[bid] -= discarded_tokens;
189-
}
190-
191184
accept_num[bid] = new_accept_num;
192185
limit_status[bid] = status;
193186
max_reply_lens[bid] = max_reply_len;
@@ -221,7 +214,7 @@ void SpeculateLimitThinkingContentLength(
221214
const_cast<int64_t*>(next_tokens.data<int64_t>()),
222215
max_think_lens.data<int>(),
223216
const_cast<int*>(max_reply_lens.data<int>()),
224-
const_cast<int64_t*>(step_idx.data<int64_t>()),
217+
step_idx.data<int64_t>(),
225218
eos_token_ids.data<int64_t>(),
226219
const_cast<int*>(limit_status.data<int>()),
227220
const_cast<int*>(accept_num.data<int>()),

custom_ops/gpu_ops/speculate_decoding/speculate_set_stop_value_multi_seqs.cu

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -51,17 +51,18 @@ __global__ void spec_set_value_by_stop_seqs(bool *stop_flags,
5151
const int64_t step_idx_now = step_idx[bid];
5252
const int64_t min_token_limit = min_tokens[bid];
5353

54-
const bool can_stop = (step_idx_now >= min_token_limit);
54+
const bool can_stop = (step_idx_now + accept_num >= min_token_limit);
5555
if (!can_stop) return;
5656
if (!stop_flags[bid]) {
5757
int accept_idx = 0;
5858
bool is_end = false;
59+
5960
// 遍历起始位置
6061
for (; accept_idx <= accept_num - 1 && !is_end; accept_idx++) {
6162
if (step_idx_now + accept_idx + 1 < stop_seq_len) {
6263
#ifdef DEBUG_SPEC_STOP_SEQS
6364
printf("num %d < stop_seq_len %d\n",
64-
step_idx_now - accept_num + accept_idx + 1,
65+
step_idx_now + accept_idx + 1,
6566
stop_seq_len);
6667
#endif
6768
continue;
@@ -71,7 +72,7 @@ __global__ void spec_set_value_by_stop_seqs(bool *stop_flags,
7172
int64_t cur_token_idx = -1;
7273

7374
// 通过当前值判断 token 是在 pre_ids 还是 accept_token 里
74-
if (stop_seq_len - 1 - i < accept_idx) {
75+
if (stop_seq_len - 1 - i <= accept_idx) {
7576
#ifdef DEBUG_SPEC_STOP_SEQS
7677
printf(
7778
"AcceptTokens bid:%d. tid:%d, accept_idx:%d, "
@@ -80,10 +81,10 @@ __global__ void spec_set_value_by_stop_seqs(bool *stop_flags,
8081
bid,
8182
tid,
8283
accept_idx,
83-
accept_idx - (stop_seq_len - 1 - i) - 1);
84+
accept_idx - (stop_seq_len - 1 - i));
8485
#endif
8586
cur_token_idx =
86-
accept_tokens_now[accept_idx - (stop_seq_len - 1 - i) - 1];
87+
accept_tokens_now[accept_idx - (stop_seq_len - 1 - i)];
8788
} else {
8889
#ifdef DEBUG_SPEC_STOP_SEQS
8990
printf(
@@ -94,8 +95,7 @@ __global__ void spec_set_value_by_stop_seqs(bool *stop_flags,
9495
tid,
9596
step_idx_now,
9697
accept_idx,
97-
step_idx_now - accept_num + accept_idx -
98-
(stop_seq_len - 1 - i));
98+
step_idx_now + accept_idx - (stop_seq_len - 1 - i));
9999
#endif
100100
int pre_ids_idx =
101101
step_idx_now + accept_idx - (stop_seq_len - 1 - i);

docs/get_started/installation/kunlunxin_xpu.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,9 @@ Verified platform:
2828
```bash
2929
mkdir Work
3030
cd Work
31-
docker pull ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/fastdeploy-xpu:2.4.0
31+
docker pull ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/fastdeploy-xpu:2.5.0
3232
docker run --name fastdeploy-xpu --net=host -itd --privileged -v $PWD:/Work -w /Work \
33-
ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/fastdeploy-xpu:2.4.0 \
33+
ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/fastdeploy-xpu:2.5.0 \
3434
/bin/bash
3535
docker exec -it fastdeploy-xpu /bin/bash
3636
```
@@ -40,7 +40,7 @@ docker exec -it fastdeploy-xpu /bin/bash
4040
### Install PaddlePaddle
4141

4242
```bash
43-
python -m pip install paddlepaddle-xpu==3.3.0 -i https://www.paddlepaddle.org.cn/packages/stable/xpu-p800/
43+
python -m pip install paddlepaddle-xpu==3.3.1 -i https://www.paddlepaddle.org.cn/packages/stable/xpu-p800/
4444
```
4545

4646
Alternatively, you can install the latest version of PaddlePaddle (Not recommended)
@@ -52,7 +52,7 @@ python -m pip install --pre paddlepaddle-xpu -i https://www.paddlepaddle.org.cn/
5252
### Install FastDeploy (**Do NOT install via PyPI source**)
5353

5454
```bash
55-
python -m pip install fastdeploy-xpu==2.4.0 -i https://www.paddlepaddle.org.cn/packages/stable/fastdeploy-xpu-p800/ --extra-index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
55+
python -m pip install fastdeploy-xpu==2.5.0 -i https://www.paddlepaddle.org.cn/packages/stable/fastdeploy-xpu-p800/ --extra-index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
5656
```
5757

5858
Alternatively, you can install the latest version of FastDeploy (Not recommended)
@@ -66,7 +66,7 @@ python -m pip install --pre fastdeploy-xpu -i https://www.paddlepaddle.org.cn/pa
6666
### Install PaddlePaddle
6767

6868
```bash
69-
python -m pip install paddlepaddle-xpu==3.3.0 -i https://www.paddlepaddle.org.cn/packages/stable/xpu-p800/
69+
python -m pip install paddlepaddle-xpu==3.3.1 -i https://www.paddlepaddle.org.cn/packages/stable/xpu-p800/
7070
```
7171

7272
Alternatively, you can install the latest version of PaddlePaddle (Not recommended)

0 commit comments

Comments
 (0)