ModelEngine-Group
diff --git a/‎.github/workflows/pull-request.yml‎
Lines changed: 67 additions & 14 deletions b/‎.github/workflows/pull-request.yml‎
Lines changed: 67 additions & 14 deletions
diff --git a/‎docker/Dockerfile.sglang_gpu‎
Lines changed: 0 additions & 21 deletions b/‎docker/Dockerfile.sglang_gpu‎
Lines changed: 0 additions & 21 deletions
diff --git a/‎docker/Dockerfile.mindie_llm‎ ‎docker/Dockerfile.ucm-mindie-ascend.a2-v2‎docker/Dockerfile.mindie_llm renamed to docker/Dockerfile.ucm-mindie-ascend.a2-v2
Lines changed: 15 additions & 3 deletions b/‎docker/Dockerfile.mindie_llm‎ ‎docker/Dockerfile.ucm-mindie-ascend.a2-v2‎docker/Dockerfile.mindie_llm renamed to docker/Dockerfile.ucm-mindie-ascend.a2-v2
Lines changed: 15 additions & 3 deletions
diff --git a/‎docker/Dockerfile.ucm-sglang-cuda-v0.5.5‎
Lines changed: 33 additions & 0 deletions b/‎docker/Dockerfile.ucm-sglang-cuda-v0.5.5‎
Lines changed: 33 additions & 0 deletions
diff --git a/‎docker/Dockerfile.ucm-vllm-ascend.a2-latest‎
Lines changed: 29 additions & 0 deletions b/‎docker/Dockerfile.ucm-vllm-ascend.a2-latest‎
Lines changed: 29 additions & 0 deletions
diff --git a/‎docker/Dockerfile.ucm-vllm-ascend.a2-v0.11.0‎
Lines changed: 37 additions & 0 deletions b/‎docker/Dockerfile.ucm-vllm-ascend.a2-v0.11.0‎
Lines changed: 37 additions & 0 deletions
diff --git a/‎docker/Dockerfile.ucm-vllm-ascend.a2-v0.17.0‎
Lines changed: 29 additions & 0 deletions b/‎docker/Dockerfile.ucm-vllm-ascend.a2-v0.17.0‎
Lines changed: 29 additions & 0 deletions
diff --git a/‎docker/Dockerfile.ucm-vllm-cuda-latest‎
Lines changed: 33 additions & 0 deletions b/‎docker/Dockerfile.ucm-vllm-cuda-latest‎
Lines changed: 33 additions & 0 deletions
diff --git a/‎docker/Dockerfile.ucm-vllm-cuda-v0.11.0‎
Lines changed: 32 additions & 0 deletions b/‎docker/Dockerfile.ucm-vllm-cuda-v0.11.0‎
Lines changed: 32 additions & 0 deletions
@@ -73,8 +73,8 @@ jobs:
     needs: pre-check
     uses: ./.github/workflows/lint-and-test.yml
 
-  test-build-npu:
-    timeout-minutes: 15
+  test-build-vllm-ascend:
+    timeout-minutes: 25
     runs-on: ubuntu-24.04-arm
     steps:
       - uses: actions/checkout@v4
@@ -88,16 +88,17 @@ jobs:
         uses: docker/build-push-action@v5
         with:
           context: .
-          file: ./docker/Dockerfile.vllm_npu
-          build-args: "ENABLE_SPARSE=false"
+          file: ./docker/Dockerfile.ucm-vllm-ascend.a2-v0.17.0
+          build-args: |
+            PIP_INDEX_URL=https://pypi.org/simple
           tags: ucm-npu:latest
           push: false
           load: false
           cache-from: type=gha,scope=npu
           cache-to: type=gha,mode=max,scope=npu
 
-  test-build-npu-sparse:
-    timeout-minutes: 15
+  test-build-vllm-ascend-sparse:
+    timeout-minutes: 25
     runs-on: ubuntu-24.04-arm
     steps:
       - uses: actions/checkout@v4
@@ -111,14 +112,63 @@ jobs:
         uses: docker/build-push-action@v5
         with:
           context: .
-          file: ./docker/Dockerfile.vllm_npu_v0110
-          build-args: "ENABLE_SPARSE=true"
+          file: ./docker/Dockerfile.ucm-vllm-ascend.a2-v0.11.0
+          build-args: |
+            PIP_INDEX_URL=https://pypi.org/simple
           tags: ucm-npu-sparse:latest
           push: false
           load: false
           cache-from: type=gha,scope=npu
           cache-to: type=gha,mode=max,scope=npu
 
+  test-build-sglang:
+    timeout-minutes: 25
+    runs-on: ubuntu-24.04
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Build Image
+        uses: docker/build-push-action@v5
+        with:
+          context: .
+          file: ./docker/Dockerfile.ucm-sglang-cuda-v0.5.5
+          build-args: |
+            PIP_INDEX_URL=https://pypi.org/simple
+          tags: ucm-sglang:latest
+          push: false
+          load: false
+          cache-from: type=gha,scope=gpu
+          cache-to: type=gha,mode=max,scope=gpu
+
+  test-build-mindie:
+    timeout-minutes: 25
+    runs-on: ubuntu-24.04-arm
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Build Image
+        uses: docker/build-push-action@v5
+        with:
+          context: .
+          file: ./docker/Dockerfile.ucm-mindie-ascend.a2-v2
+          build-args: |
+            PIP_INDEX_URL=https://pypi.org/simple
+          tags: ucm-mindie:latest
+          push: false
+          load: false
+          cache-from: type=gha,scope=npu
+          cache-to: type=gha,mode=max,scope=npu
+
   test-e2e-pc-gpu:
     timeout-minutes: 25
     runs-on: ["gpu-test-in-docker"]
@@ -159,9 +209,10 @@ jobs:
         sudo -E docker build --network=host \
           --build-arg http_proxy="${http_proxy:-}" \
           --build-arg https_proxy="${https_proxy:-}" \
-          --build-arg ENABLE_SPARSE=false \
+          --build-arg no_proxy="repo.huaweicloud.com,${no_proxy:-}" \
+          --build-arg PIP_INDEX_URL=https://repo.huaweicloud.com/repository/pypi/simple \
           -t ucm-e2etest-online-inference:${{ steps.version.outputs.version }} \
-          -f ./docker/Dockerfile.vllm_gpu ./
+          -f ./docker/Dockerfile.ucm-vllm-cuda-v0.17.0 ./
     - name: Test E2E Online Inference in Docker
       run: |
         sudo chmod -R 777 /workspace/test_results/
@@ -228,9 +279,10 @@ jobs:
         sudo -E docker build --network=host \
           --build-arg http_proxy="${http_proxy:-}" \
           --build-arg https_proxy="${https_proxy:-}" \
-          --build-arg ENABLE_SPARSE=true \
+          --build-arg no_proxy="repo.huaweicloud.com,${no_proxy:-}" \
+          --build-arg PIP_INDEX_URL=https://repo.huaweicloud.com/repository/pypi/simple \
           -t ucm-e2etest-gpu-sparse:${{ steps.version.outputs.version }} \
-          -f ./docker/Dockerfile.vllm_gpu_v0110 ./
+          -f ./docker/Dockerfile.ucm-vllm-cuda-v0.11.0 ./
     - name: Test E2E in Docker
       run: |
         sudo chmod -R 777 /workspace/test_results/
@@ -296,9 +348,10 @@ jobs:
         sudo -E docker build --network=host \
           --build-arg http_proxy="${http_proxy:-}" \
           --build-arg https_proxy="${https_proxy:-}" \
-          --build-arg ENABLE_SPARSE=true \
+          --build-arg no_proxy="repo.huaweicloud.com,${no_proxy:-}" \
+          --build-arg PIP_INDEX_URL=https://repo.huaweicloud.com/repository/pypi/simple \
           -t ucm-e2etest-gpu-sparse:${{ steps.version.outputs.version }} \
-          -f ./docker/Dockerfile.vllm_gpu_v0110 ./
+          -f ./docker/Dockerfile.ucm-vllm-cuda-v0.11.0 ./
     - name: Test E2E in Docker
       run: |
         sudo chmod -R 777 /workspace/test_results/
 
@@ -1,11 +1,13 @@
 # Set to other image if needed
-ARG BASE_IMAGE="swr.cn-south-1.myhuaweicloud.com/ascendhub/mindie:2.3.0-800I-A2-py311-openeuler24.03-lts"
+ARG IMAGE_SOURCE="swr.cn-south-1.myhuaweicloud.com/ascendhub"
+ARG IMAGE_NAME_VERSION="mindie:2.3.0-800I-A2-py311-openeuler24.03-lts"
 
-FROM ${BASE_IMAGE}
+FROM ${IMAGE_SOURCE}/${IMAGE_NAME_VERSION}
 
 ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
 ARG UCM_ENABLE_MINDIE=1
 ARG UCM_CXX11_ABI=1
+ARG INSTALL_MODE="source"
 
 WORKDIR /workspace
 
@@ -18,7 +20,17 @@ ENV PLATFORM=ascend \
     UCM_ENABLE_MINDIE=${UCM_ENABLE_MINDIE} \
     UCM_CXX11_ABI=${UCM_CXX11_ABI}
 
-RUN pip install -v -e /workspace/unified-cache-management --no-build-isolation
+# Build or link package
+RUN if [ "${INSTALL_MODE}" != "package" ]; then \
+        pip install --no-cache-dir build cmake && \
+        export WORKSPACE=/workspace SKIP_TAR=1 && \
+        bash /workspace/unified-cache-management/scripts/build_mindie.sh; \
+    else \
+        ln -s /workspace/unified-cache-management /workspace/package; \
+    fi
+
+# Install UCM
+RUN pip install /workspace/package/uc_manager-*.whl
 
 # Apply patch for MindIE
 RUN UCM_ENABLE_MINDIE=${UCM_ENABLE_MINDIE} python -c "import ucm.integration.mindie.patch.boot_patch as bp; bp.apply_patch(); print('[UCM][MindIE] patch applied (or already in place).')"
 
@@ -0,0 +1,33 @@
+# Set to other image if needed
+ARG IMAGE_SOURCE="lmsysorg"
+ARG IMAGE_NAME_VERSION="sglang:v0.5.5.post3"
+
+FROM ${IMAGE_SOURCE}/${IMAGE_NAME_VERSION}
+
+ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
+ARG INSTALL_MODE="source"
+
+WORKDIR /workspace
+
+# Install unified-cache-management
+COPY . /workspace/unified-cache-management
+
+RUN pip config set global.index-url ${PIP_INDEX_URL}
+
+# Build or link package
+RUN if [ "${INSTALL_MODE}" != "package" ]; then \
+        pip install --no-cache-dir build cmake && \
+        export WORKSPACE=/workspace SKIP_TAR=1 && \
+        bash /workspace/unified-cache-management/scripts/build_sglang.sh; \
+    else \
+        ln -s /workspace/unified-cache-management /workspace/package; \
+    fi
+
+# Install UCM
+RUN pip install /workspace/package/uc_manager-*.whl
+
+# Apply patch for SGLang
+RUN cd /sgl-workspace/sglang \
+    && git apply /workspace/package/sglang-adapt.patch
+
+ENTRYPOINT ["/bin/bash"]
@@ -0,0 +1,29 @@
+# Set to other image if needed
+ARG IMAGE_SOURCE="quay.io/ascend"
+ARG IMAGE_NAME_VERSION="vllm-ascend:nightly-main-openeuler"
+
+FROM ${IMAGE_SOURCE}/${IMAGE_NAME_VERSION}
+
+ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
+ARG INSTALL_MODE="source"
+
+WORKDIR /workspace
+
+# Install unified-cache-management
+COPY . /workspace/unified-cache-management
+
+RUN pip config set global.index-url ${PIP_INDEX_URL}
+
+# Build or link package
+RUN if [ "${INSTALL_MODE}" != "package" ]; then \
+        pip install --no-cache-dir build cmake && \
+        export WORKSPACE=/workspace SKIP_TAR=1 ENABLE_SPARSE=false && \
+        bash /workspace/unified-cache-management/scripts/build_ascend.sh; \
+    else \
+        ln -s /workspace/unified-cache-management /workspace/package; \
+    fi
+
+# Install UCM
+RUN pip install /workspace/package/uc_manager-*.whl
+
+CMD ["/bin/bash"]
@@ -0,0 +1,37 @@
+# Set to other image if needed
+ARG IMAGE_SOURCE="quay.io/ascend"
+ARG IMAGE_NAME_VERSION="vllm-ascend:v0.11.0"
+
+FROM ${IMAGE_SOURCE}/${IMAGE_NAME_VERSION}
+
+ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
+ARG INSTALL_MODE="source"
+
+# Apply the UCM monkey patch for vllm & vllm_ascend
+ENV ENABLE_UCM_PATCH=1
+
+WORKDIR /workspace
+
+# Install unified-cache-management
+COPY . /workspace/unified-cache-management
+
+RUN pip config set global.index-url ${PIP_INDEX_URL}
+
+# Build or link package
+RUN if [ "${INSTALL_MODE}" != "package" ]; then \
+        pip install --no-cache-dir build cmake && \
+        export WORKSPACE=/workspace SKIP_TAR=1 && \
+        bash /workspace/unified-cache-management/scripts/build_ascend.sh; \
+    else \
+        ln -s /workspace/unified-cache-management /workspace/package; \
+    fi
+
+# Install UCM
+RUN pip install /workspace/package/uc_manager-*.whl
+
+# Install Ascend custom ops if present
+RUN if [ -f /workspace/package/install_ascend_ops.sh ]; then \
+        cd /workspace/package && bash install_ascend_ops.sh; \
+    fi
+
+CMD ["/bin/bash"]
@@ -0,0 +1,29 @@
+# Set to other image if needed
+ARG IMAGE_SOURCE="quay.io/ascend"
+ARG IMAGE_NAME_VERSION="vllm-ascend:v0.17.0rc1"
+
+FROM ${IMAGE_SOURCE}/${IMAGE_NAME_VERSION}
+
+ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
+ARG INSTALL_MODE="source"
+
+WORKDIR /workspace
+
+# Install unified-cache-management
+COPY . /workspace/unified-cache-management
+
+RUN pip config set global.index-url ${PIP_INDEX_URL}
+
+# Build or link package
+RUN if [ "${INSTALL_MODE}" != "package" ]; then \
+        pip install --no-cache-dir build cmake && \
+        export WORKSPACE=/workspace SKIP_TAR=1 ENABLE_SPARSE=false && \
+        bash /workspace/unified-cache-management/scripts/build_ascend.sh; \
+    else \
+        ln -s /workspace/unified-cache-management /workspace/package; \
+    fi
+
+# Install UCM
+RUN pip install /workspace/package/uc_manager-*.whl
+
+CMD ["/bin/bash"]
@@ -0,0 +1,33 @@
+# Set to other image if needed
+ARG IMAGE_SOURCE="vllm"
+ARG IMAGE_NAME_VERSION="vllm-openai:nightly"
+
+FROM ${IMAGE_SOURCE}/${IMAGE_NAME_VERSION}
+
+ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
+ARG INSTALL_MODE="source"
+
+WORKDIR /workspace
+
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends git && \
+    rm -rf /var/lib/apt/lists/*
+
+# Install unified-cache-management
+COPY . /workspace/unified-cache-management
+
+RUN pip config set global.index-url ${PIP_INDEX_URL}
+
+# Build or link package
+RUN if [ "${INSTALL_MODE}" != "package" ]; then \
+        pip install --no-cache-dir build cmake && \
+        export WORKSPACE=/workspace SKIP_TAR=1 ENABLE_SPARSE=false && \
+        bash /workspace/unified-cache-management/scripts/build_cuda.sh; \
+    else \
+        ln -s /workspace/unified-cache-management /workspace/package; \
+    fi
+
+# Install UCM
+RUN pip install /workspace/package/uc_manager-*.whl
+
+ENTRYPOINT ["/bin/bash"]
@@ -0,0 +1,32 @@
+# Set to other image if needed
+ARG IMAGE_SOURCE="vllm"
+ARG IMAGE_NAME_VERSION="vllm-openai:v0.11.0"
+
+FROM ${IMAGE_SOURCE}/${IMAGE_NAME_VERSION}
+
+ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
+ARG INSTALL_MODE="source"
+
+# Apply the UCM monkey patch for vllm
+ENV ENABLE_UCM_PATCH=1
+
+WORKDIR /workspace
+
+# Install unified-cache-management
+COPY . /workspace/unified-cache-management
+
+RUN pip config set global.index-url ${PIP_INDEX_URL}
+
+# Build or link package
+RUN if [ "${INSTALL_MODE}" != "package" ]; then \
+        pip install --no-cache-dir build cmake && \
+        export WORKSPACE=/workspace SKIP_TAR=1 && \
+        bash /workspace/unified-cache-management/scripts/build_cuda.sh; \
+    else \
+        ln -s /workspace/unified-cache-management /workspace/package; \
+    fi
+
+# Install UCM
+RUN pip install /workspace/package/uc_manager-*.whl
+
+ENTRYPOINT ["/bin/bash"]