grimoire
diff --git a/‎.claude/skills/check-env/SKILL.md‎
Lines changed: 0 additions & 41 deletions b/‎.claude/skills/check-env/SKILL.md‎
Lines changed: 0 additions & 41 deletions
diff --git a/‎.claude/skills/code-navigation/SKILL.md‎
Lines changed: 0 additions & 49 deletions b/‎.claude/skills/code-navigation/SKILL.md‎
Lines changed: 0 additions & 49 deletions
diff --git a/‎.claude/skills/docker-build/SKILL.md‎
Lines changed: 95 additions & 0 deletions b/‎.claude/skills/docker-build/SKILL.md‎
Lines changed: 95 additions & 0 deletions
diff --git a/‎.claude/skills/resolve-review/SKILL.md‎
Lines changed: 0 additions & 42 deletions b/‎.claude/skills/resolve-review/SKILL.md‎
Lines changed: 0 additions & 42 deletions
diff --git a/‎.claude/skills/submit-pr/SKILL.md‎
Lines changed: 0 additions & 59 deletions b/‎.claude/skills/submit-pr/SKILL.md‎
Lines changed: 0 additions & 59 deletions
diff --git a/‎.claude/skills/support-new-model/SKILL.md‎
Lines changed: 1 addition & 0 deletions b/‎.claude/skills/support-new-model/SKILL.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.github/workflows/pr_ete_test.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/pr_ete_test.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎.gitignore‎
Lines changed: 5 additions & 0 deletions b/‎.gitignore‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎CMakeLists.txt‎
Lines changed: 4 additions & 1 deletion b/‎CMakeLists.txt‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎autotest/tools/pipeline/llm_case.py‎
Lines changed: 2 additions & 1 deletion b/‎autotest/tools/pipeline/llm_case.py‎
Lines changed: 2 additions & 1 deletion
@@ -0,0 +1,95 @@
+---
+name: docker-build
+description: Build an LMDeploy Docker image and push it to the inner registry.
+disable-model-invocation: true
+---
+
+# Docker Build & Push
+
+Build an LMDeploy Docker image and push it to the inner registry.
+
+## Prerequisites
+
+Before starting, verify all three environment variables are set:
+
+```bash
+echo $LMDEPLOY_REGISTRY    # inner registry server address
+echo $REGISTRY_USER        # registry login username
+test -n "$REGISTRY_PASSWORD" && echo "<set>" || echo "<missing>"  # registry login password
+```
+
+If any are missing, stop and tell the user to set them before proceeding.
+
+## 1. Determine image tag
+
+```bash
+BRANCH=$(git branch --show-current | sed 's/[^a-zA-Z0-9._-]/-/g')
+SHA=$(git rev-parse --short=7 HEAD)
+TAG="${BRANCH}-${SHA}"
+IMAGE="${LMDEPLOY_REGISTRY}/lmdeploy:${TAG}"
+```
+
+Print the computed image name so the user can confirm.
+
+## 2. Build
+
+Ask the user which build mode:
+
+- **patch** (default) — uses `docker/Dockerfile_patch`, fast overlay on existing image
+- **full** — uses `docker/Dockerfile`, full multi-stage build from scratch
+
+### Patch build (default)
+
+```bash
+docker build -f docker/Dockerfile_patch \
+  --build-arg BASE_IMAGE=openmmlab/lmdeploy:v0.12.3.post2-cu12.8 \
+  --build-arg BACKEND=pytorch \
+  --build-arg http_proxy=${http_proxy:-} \
+  --build-arg https_proxy=${https_proxy:-} \
+  --build-arg no_proxy=${no_proxy:-} \
+  -t "${IMAGE}" \
+  .
+```
+
+User can override:
+
+- `BASE_IMAGE` — default `openmmlab/lmdeploy:v0.12.3.post2-cu12.8`
+- `BACKEND` — default `pytorch`; set to `turbomind` to include TurboMind C++ extension
+
+### Full build
+
+```bash
+docker build -f docker/Dockerfile \
+  --build-arg CUDA_VERSION=cu12.8 \
+  --build-arg http_proxy=${http_proxy:-} \
+  --build-arg https_proxy=${https_proxy:-} \
+  --build-arg no_proxy=${no_proxy:-} \
+  -t "${IMAGE}" \
+  .
+```
+
+User can override `CUDA_VERSION` — default `cu12.8`.
+
+### Verify
+
+```bash
+docker images "${IMAGE}"
+```
+
+## 3. Push
+
+Skip this step if the user only wants a local build.
+
+### Login
+
+```bash
+echo "${REGISTRY_PASSWORD}" | docker login "${LMDEPLOY_REGISTRY}" -u "${REGISTRY_USER}" --password-stdin
+```
+
+### Push
+
+```bash
+docker push "${IMAGE}"
+```
+
+Confirm success via exit code.
@@ -1,6 +1,7 @@
 ---
 name: support-new-model
 description: Add a new LLM or VLM to LMDeploy's PyTorch backend.
+disable-model-invocation: true
 ---
 
 # Tutorial: Adding a New Model to LMDeploy (PyTorch Backend)
 
@@ -89,7 +89,7 @@ jobs:
           exit 1
       - name: Test restful server - turbomind InternVL3-38B
         run: |
-          CUDA_VISIBLE_DEVICES=6,7 lmdeploy serve api_server /nvme/qa_test_models/OpenGVLab/InternVL3-38B --tp 2 --backend turbomind --logprobs-mode raw_logprobs --allow-terminate-by-client > ${{env.SERVER_LOG}}/turbomind_InternVL3-38B_start_restful.log 2>&1 &
+          CUDA_VISIBLE_DEVICES=6,7 lmdeploy serve api_server /nvme/qa_test_models/OpenGVLab/InternVL3-38B --tp 2 --backend turbomind --logprobs-mode raw_logprobs --allow-terminate-by-client --trust-remote-code > ${{env.SERVER_LOG}}/turbomind_InternVL3-38B_start_restful.log 2>&1 &
           echo "restful_pid=$!"
           for i in $(seq 1 180)
           do
@@ -169,7 +169,7 @@ jobs:
           exit 1
       - name: Test restful server - pytorch InternVL3_5-30B-A3B
         run: |
-          CUDA_VISIBLE_DEVICES=6,7 lmdeploy serve api_server /nvme/qa_test_models/OpenGVLab/InternVL3_5-30B-A3B --tp 2 --backend pytorch --logprobs-mode raw_logprobs  --allow-terminate-by-client > ${{env.SERVER_LOG}}/pytorch_InternVL3_5-30B-A3B_start_restful.log 2>&1 &
+          CUDA_VISIBLE_DEVICES=6,7 lmdeploy serve api_server /nvme/qa_test_models/OpenGVLab/InternVL3_5-30B-A3B --tp 2 --backend pytorch --logprobs-mode raw_logprobs  --allow-terminate-by-client --trust-remote-code > ${{env.SERVER_LOG}}/pytorch_InternVL3_5-30B-A3B_start_restful.log 2>&1 &
           echo "restful_pid=$!"
           for i in $(seq 1 180)
           do
 
@@ -9,6 +9,11 @@ __pycache__/
 # C extensions
 *.so
 
+# skills
+.cursor/
+!.claude/skills/docker-build/
+!.claude/skills/docker-build/SKILL.md
+
 # Distribution / packaging
 .Python
 triton-rerope/
 
@@ -263,8 +263,11 @@ if(ARCH STREQUAL "x86_64")
     if (${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL "12.8")
       list(APPEND CMAKE_CUDA_ARCHITECTURES 120a-real) # 5090
     endif ()
+    if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL "12.8")
+      list(APPEND CMAKE_CUDA_ARCHITECTURES 100a-real) # B200
+    endif()
     if (MSVC)
-      list(REMOVE_ITEM CMAKE_CUDA_ARCHITECTURES 80-real 90a-real)
+      list(REMOVE_ITEM CMAKE_CUDA_ARCHITECTURES 80-real 90a-real 100a-real)
     endif ()
   endif ()
 elseif(ARCH STREQUAL "aarch64")
 
@@ -54,7 +54,8 @@ def run_pipeline_chat_test(model_path, run_config, cases_path, is_pr_test: bool
 
     print('backend_config config: ' + str(backend_config))
     print('speculative_config config: ' + str(speculative_config))
-    pipe = pipeline(model_path, backend_config=backend_config, speculative_config=speculative_config)
+    pipe = pipeline(model_path, backend_config=backend_config, speculative_config=speculative_config,
+                    trust_remote_code=True)
 
     cases_path = os.path.join(cases_path)
     with open(cases_path) as f: