Skip to content

Commit 89d89c2

Browse files
committed
merge main
2 parents a0ec686 + df9b428 commit 89d89c2

171 files changed

Lines changed: 6737 additions & 1748 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.claude/skills/check-env/SKILL.md

Lines changed: 0 additions & 41 deletions
This file was deleted.

.claude/skills/code-navigation/SKILL.md

Lines changed: 0 additions & 49 deletions
This file was deleted.
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
---
2+
name: docker-build
3+
description: Build an LMDeploy Docker image and push it to the inner registry.
4+
disable-model-invocation: true
5+
---
6+
7+
# Docker Build & Push
8+
9+
Build an LMDeploy Docker image and push it to the inner registry.
10+
11+
## Prerequisites
12+
13+
Before starting, verify all three environment variables are set:
14+
15+
```bash
16+
echo $LMDEPLOY_REGISTRY # inner registry server address
17+
echo $REGISTRY_USER # registry login username
18+
test -n "$REGISTRY_PASSWORD" && echo "<set>" || echo "<missing>" # registry login password
19+
```
20+
21+
If any are missing, stop and tell the user to set them before proceeding.
22+
23+
## 1. Determine image tag
24+
25+
```bash
26+
BRANCH=$(git branch --show-current | sed 's/[^a-zA-Z0-9._-]/-/g')
27+
SHA=$(git rev-parse --short=7 HEAD)
28+
TAG="${BRANCH}-${SHA}"
29+
IMAGE="${LMDEPLOY_REGISTRY}/lmdeploy:${TAG}"
30+
```
31+
32+
Print the computed image name so the user can confirm.
33+
34+
## 2. Build
35+
36+
Ask the user which build mode:
37+
38+
- **patch** (default) — uses `docker/Dockerfile_patch`, fast overlay on existing image
39+
- **full** — uses `docker/Dockerfile`, full multi-stage build from scratch
40+
41+
### Patch build (default)
42+
43+
```bash
44+
docker build -f docker/Dockerfile_patch \
45+
--build-arg BASE_IMAGE=openmmlab/lmdeploy:v0.12.3.post2-cu12.8 \
46+
--build-arg BACKEND=pytorch \
47+
--build-arg http_proxy=${http_proxy:-} \
48+
--build-arg https_proxy=${https_proxy:-} \
49+
--build-arg no_proxy=${no_proxy:-} \
50+
-t "${IMAGE}" \
51+
.
52+
```
53+
54+
User can override:
55+
56+
- `BASE_IMAGE` — default `openmmlab/lmdeploy:v0.12.3.post2-cu12.8`
57+
- `BACKEND` — default `pytorch`; set to `turbomind` to include TurboMind C++ extension
58+
59+
### Full build
60+
61+
```bash
62+
docker build -f docker/Dockerfile \
63+
--build-arg CUDA_VERSION=cu12.8 \
64+
--build-arg http_proxy=${http_proxy:-} \
65+
--build-arg https_proxy=${https_proxy:-} \
66+
--build-arg no_proxy=${no_proxy:-} \
67+
-t "${IMAGE}" \
68+
.
69+
```
70+
71+
User can override `CUDA_VERSION` — default `cu12.8`.
72+
73+
### Verify
74+
75+
```bash
76+
docker images "${IMAGE}"
77+
```
78+
79+
## 3. Push
80+
81+
Skip this step if the user only wants a local build.
82+
83+
### Login
84+
85+
```bash
86+
echo "${REGISTRY_PASSWORD}" | docker login "${LMDEPLOY_REGISTRY}" -u "${REGISTRY_USER}" --password-stdin
87+
```
88+
89+
### Push
90+
91+
```bash
92+
docker push "${IMAGE}"
93+
```
94+
95+
Confirm success via exit code.

.claude/skills/resolve-review/SKILL.md

Lines changed: 0 additions & 42 deletions
This file was deleted.

.claude/skills/submit-pr/SKILL.md

Lines changed: 0 additions & 59 deletions
This file was deleted.

.claude/skills/support-new-model/SKILL.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
---
22
name: support-new-model
33
description: Add a new LLM or VLM to LMDeploy's PyTorch backend.
4+
disable-model-invocation: true
45
---
56

67
# Tutorial: Adding a New Model to LMDeploy (PyTorch Backend)

.github/workflows/pr_ete_test.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ jobs:
8989
exit 1
9090
- name: Test restful server - turbomind InternVL3-38B
9191
run: |
92-
CUDA_VISIBLE_DEVICES=6,7 lmdeploy serve api_server /nvme/qa_test_models/OpenGVLab/InternVL3-38B --tp 2 --backend turbomind --logprobs-mode raw_logprobs --allow-terminate-by-client > ${{env.SERVER_LOG}}/turbomind_InternVL3-38B_start_restful.log 2>&1 &
92+
CUDA_VISIBLE_DEVICES=6,7 lmdeploy serve api_server /nvme/qa_test_models/OpenGVLab/InternVL3-38B --tp 2 --backend turbomind --logprobs-mode raw_logprobs --allow-terminate-by-client --trust-remote-code > ${{env.SERVER_LOG}}/turbomind_InternVL3-38B_start_restful.log 2>&1 &
9393
echo "restful_pid=$!"
9494
for i in $(seq 1 180)
9595
do
@@ -169,7 +169,7 @@ jobs:
169169
exit 1
170170
- name: Test restful server - pytorch InternVL3_5-30B-A3B
171171
run: |
172-
CUDA_VISIBLE_DEVICES=6,7 lmdeploy serve api_server /nvme/qa_test_models/OpenGVLab/InternVL3_5-30B-A3B --tp 2 --backend pytorch --logprobs-mode raw_logprobs --allow-terminate-by-client > ${{env.SERVER_LOG}}/pytorch_InternVL3_5-30B-A3B_start_restful.log 2>&1 &
172+
CUDA_VISIBLE_DEVICES=6,7 lmdeploy serve api_server /nvme/qa_test_models/OpenGVLab/InternVL3_5-30B-A3B --tp 2 --backend pytorch --logprobs-mode raw_logprobs --allow-terminate-by-client --trust-remote-code > ${{env.SERVER_LOG}}/pytorch_InternVL3_5-30B-A3B_start_restful.log 2>&1 &
173173
echo "restful_pid=$!"
174174
for i in $(seq 1 180)
175175
do

.gitignore

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,11 @@ __pycache__/
99
# C extensions
1010
*.so
1111

12+
# skills
13+
.cursor/
14+
!.claude/skills/docker-build/
15+
!.claude/skills/docker-build/SKILL.md
16+
1217
# Distribution / packaging
1318
.Python
1419
triton-rerope/

CMakeLists.txt

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -263,8 +263,11 @@ if(ARCH STREQUAL "x86_64")
263263
if (${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL "12.8")
264264
list(APPEND CMAKE_CUDA_ARCHITECTURES 120a-real) # 5090
265265
endif ()
266+
if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL "12.8")
267+
list(APPEND CMAKE_CUDA_ARCHITECTURES 100a-real) # B200
268+
endif()
266269
if (MSVC)
267-
list(REMOVE_ITEM CMAKE_CUDA_ARCHITECTURES 80-real 90a-real)
270+
list(REMOVE_ITEM CMAKE_CUDA_ARCHITECTURES 80-real 90a-real 100a-real)
268271
endif ()
269272
endif ()
270273
elseif(ARCH STREQUAL "aarch64")

autotest/tools/pipeline/llm_case.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,8 @@ def run_pipeline_chat_test(model_path, run_config, cases_path, is_pr_test: bool
5454

5555
print('backend_config config: ' + str(backend_config))
5656
print('speculative_config config: ' + str(speculative_config))
57-
pipe = pipeline(model_path, backend_config=backend_config, speculative_config=speculative_config)
57+
pipe = pipeline(model_path, backend_config=backend_config, speculative_config=speculative_config,
58+
trust_remote_code=True)
5859

5960
cases_path = os.path.join(cases_path)
6061
with open(cases_path) as f:

0 commit comments

Comments
 (0)