Skip to content

Commit ae2cc13

Browse files
authored
[Feat] Logger redesign (#1015)
## Purpose Current logger has self reference problem, causing npu to out of memory. and the logger output is incomplete after the vllm has stopped. ## Modifications refactor logger, python use std logger lib, add handler to redirect to c++ spd log. use async logger for better performance. do not create empty log file, all log files are in same directory for better user experience. capture vllm log by add provider to it, avoid hook vllm logger. ## Test verified using jenkins pipeline, works as expect, no performance impact.
1 parent 8c002ad commit ae2cc13

13 files changed

Lines changed: 397 additions & 246 deletions

.github/workflows/pull-request.yml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ jobs:
101101
cache-from: type=gha,scope=npu
102102
cache-to: type=gha,mode=max,scope=npu,ignore-error=true
103103

104-
test-build-vllm-ascend-v0-20-2:
104+
test-build-vllm-ascend-deepseekv4:
105105
timeout-minutes: 25
106106
runs-on: ubuntu-24.04-arm
107107
steps:
@@ -116,14 +116,14 @@ jobs:
116116
uses: docker/build-push-action@v5
117117
with:
118118
context: .
119-
file: ./docker/Dockerfile.ucm-vllm-ascend.a2-v0.20.2
119+
file: ./docker/Dockerfile.ucm-vllm-ascend.a2-deepseekv4
120120
build-args: |
121121
PIP_INDEX_URL=https://pypi.org/simple
122-
tags: ucm-npu-v0.20.2:latest
122+
tags: ucm-npu-deepseekv4:latest
123123
push: false
124124
load: false
125-
cache-from: type=gha,scope=npu-v0.20.2
126-
cache-to: type=gha,mode=max,scope=npu-v0.20.2,ignore-error=true
125+
cache-from: type=gha,scope=npu-deepseekv4
126+
cache-to: type=gha,mode=max,scope=npu-deepseekv4,ignore-error=true
127127

128128
test-build-vllm-cuda-v0-20-2:
129129
timeout-minutes: 25
File renamed without changes.
Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,25 @@
11
# Set to other image if needed
22
ARG IMAGE_SOURCE="quay.io/ascend"
3-
ARG IMAGE_NAME_VERSION="vllm-ascend:v0.11.0"
3+
ARG IMAGE_NAME_VERSION="vllm-ascend:v0.18.0"
44

55
FROM ${IMAGE_SOURCE}/${IMAGE_NAME_VERSION}
66

77
ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
88
ARG INSTALL_MODE="source"
99

10-
# Apply the UCM monkey patch for vllm & vllm_ascend
11-
ENV ENABLE_UCM_PATCH=1
12-
1310
WORKDIR /workspace
1411

1512
# Install unified-cache-management
1613
COPY . /workspace/unified-cache-management
1714

1815
RUN pip config set global.index-url ${PIP_INDEX_URL}
1916

17+
RUN pip install --no-cache-dir "transformers==5.4"
18+
2019
# Build or link package
2120
RUN if [ "${INSTALL_MODE}" != "package" ]; then \
2221
pip install --no-cache-dir build cmake && \
23-
export WORKSPACE=/workspace SKIP_TAR=1 && \
22+
export WORKSPACE=/workspace SKIP_TAR=1 ENABLE_SPARSE=false && \
2423
bash /workspace/unified-cache-management/scripts/build_ascend.sh; \
2524
else \
2625
ln -s /workspace/unified-cache-management /workspace/package; \
@@ -29,9 +28,4 @@ RUN if [ "${INSTALL_MODE}" != "package" ]; then \
2928
# Install UCM
3029
RUN pip install /workspace/package/uc_manager-*.whl
3130

32-
# Install Ascend custom ops if present
33-
RUN if [ -f /workspace/package/install_ascend_ops.sh ]; then \
34-
cd /workspace/package && bash install_ascend_ops.sh; \
35-
fi
36-
3731
CMD ["/bin/bash"]
Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,12 @@
11
# Set to other image if needed
2-
ARG IMAGE_SOURCE="vllm"
3-
ARG IMAGE_NAME_VERSION="vllm-openai:v0.11.0"
2+
ARG IMAGE_SOURCE="quay.io/ascend"
3+
ARG IMAGE_NAME_VERSION="vllm-ascend:v0.20.2rc1"
44

55
FROM ${IMAGE_SOURCE}/${IMAGE_NAME_VERSION}
66

77
ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
88
ARG INSTALL_MODE="source"
99

10-
# Apply the UCM monkey patch for vllm
11-
ENV ENABLE_UCM_PATCH=1
12-
1310
WORKDIR /workspace
1411

1512
# Install unified-cache-management
@@ -20,13 +17,13 @@ RUN pip config set global.index-url ${PIP_INDEX_URL}
2017
# Build or link package
2118
RUN if [ "${INSTALL_MODE}" != "package" ]; then \
2219
pip install --no-cache-dir build cmake && \
23-
export WORKSPACE=/workspace SKIP_TAR=1 && \
24-
bash /workspace/unified-cache-management/scripts/build_cuda.sh; \
20+
export WORKSPACE=/workspace SKIP_TAR=1 ENABLE_SPARSE=false && \
21+
bash /workspace/unified-cache-management/scripts/build_ascend.sh; \
2522
else \
2623
ln -s /workspace/unified-cache-management /workspace/package; \
2724
fi
2825

2926
# Install UCM
3027
RUN pip install /workspace/package/uc_manager-*.whl
3128

32-
ENTRYPOINT ["/bin/bash"]
29+
CMD ["/bin/bash"]
Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,24 @@
1-
from ucm.integration.vllm.patch.utils import patch_or_inject, when_imported
1+
import logging
2+
import os
3+
4+
from ucm.integration.vllm.patch.utils import when_imported
5+
6+
7+
def _capture_enabled() -> bool:
8+
value = os.getenv("UCM_CAPTURE_VLLM_LOG", "1").strip().lower()
9+
return value in ("1", "true", "yes", "on")
210

311

412
@when_imported("vllm.logger")
513
def patch_logger(mod):
6-
from ucm import logger
14+
if not _capture_enabled():
15+
return
16+
17+
from ucm.logger import UcmBridgeHandler, get_vllm_capture_handler
718

8-
patch_or_inject(mod, "init_logger", logger.init_logger)
9-
patch_or_inject(mod, "current_formatter_type", logger.current_formatter_type)
19+
vllm_root = logging.getLogger("vllm")
20+
if any(isinstance(h, UcmBridgeHandler) for h in vllm_root.handlers):
21+
return
22+
handler = get_vllm_capture_handler()
23+
if handler is not None:
24+
vllm_root.addHandler(handler)

0 commit comments

Comments
 (0)