Skip to content

Commit 8e21f75

Browse files
author
SHshenhao
committed
merge_main
2 parents c5e36a0 + eb0d2e2 commit 8e21f75

222 files changed

Lines changed: 1613 additions & 733 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.dockerignore

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
# Keep the Docker build context small. ctrl.Dockerfile only needs `dlslime-ctrl/`.
2+
3+
# Version control
4+
.git
5+
.gitignore
6+
.gitattributes
7+
.gitmodules
8+
9+
# Rust build artifacts
10+
**/target/
11+
12+
# Python build / cache artifacts
13+
**/build/
14+
**/__pycache__/
15+
**/*.egg-info/
16+
**/.pytest_cache/
17+
**/.mypy_cache/
18+
**/.ruff_cache/
19+
**/*.pyc
20+
**/*.pyo
21+
22+
# C++/CMake build artifacts
23+
**/CMakeCache.txt
24+
**/CMakeFiles/
25+
**/cmake_install.cmake
26+
**/*.o
27+
**/*.so
28+
**/*.a
29+
30+
# IDE / editor
31+
.vscode/
32+
.idea/
33+
*.swp
34+
*.swo
35+
36+
# OS junk
37+
.DS_Store
38+
Thumbs.db
39+
40+
# Top-level dirs not needed by ctrl.Dockerfile
41+
docs/
42+
docker/
43+
dlslime/
44+
cmake/
45+
scripts/
46+
47+
# Markdown / metadata at the repo root
48+
README.md
49+
README_zh.md
50+
LICENSE
51+
MANIFEST.in
52+
pyproject.toml
53+
CMakeLists.txt

.github/workflows/ci.yml

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -74,11 +74,13 @@ jobs:
7474
-DBUILD_ASCEND_DIRECT=OFF
7575
-DBUILD_TEST=OFF
7676
-DUSE_CUDA=ON
77-
run: python -m build --wheel
77+
# The native wheel lives under dlslime/ (dlslime-ctrl/ is a separate Rust crate).
78+
run: python -m build --wheel --outdir dist dlslime
79+
7880

7981
- name: Install wheel smoke test
8082
run: |
81-
python -m pip install dist/*.whl --no-deps
83+
python -m pip install dist/dlslime-*.whl --no-deps
8284
cd /tmp
8385
python - <<'PY'
8486
import dlslime
@@ -111,6 +113,9 @@ jobs:
111113
}
112114
trap cleanup EXIT
113115
116+
# Mount the runner-checked-out workspace into the container so the
117+
# tests always run against the exact commit being CI'd, not whatever
118+
# stale tree happens to live on the host.
114119
docker run -d \
115120
--gpus all \
116121
--network host \
@@ -119,15 +124,15 @@ jobs:
119124
--cap-add SYS_ADMIN \
120125
--cap-add SYS_PTRACE \
121126
--name "${container_name}" \
122-
-v /mnt/nvme1n1/ml_research/majinming:/mnt/nvme1n1/ml_research/majinming \
127+
-v "${GITHUB_WORKSPACE}:/workspace" \
123128
-v /mnt/nvme1n1/ml_research/models:/models \
124-
-w /mnt/nvme1n1/ml_research/majinming/src/DLSlime \
129+
-w /workspace \
125130
majinming_lmdeploy:v1 \
126131
sleep infinity
127132
128133
docker exec "${container_name}" bash -lc '
129134
set -euxo pipefail
130-
cd /mnt/nvme1n1/ml_research/majinming/src/DLSlime
135+
cd /workspace
131136
export PIP_CONFIG_FILE=/dev/null
132137
export PIP_INDEX_URL=https://mirrors.aliyun.com/pypi/simple/
133138
export PIP_TRUSTED_HOST=mirrors.aliyun.com
@@ -140,8 +145,8 @@ jobs:
140145
python -m pip install -U pip pytest
141146
python -m pip show dlslime || true
142147
python -m pip uninstall -y dlslime || true
143-
python -m pip install -e .
148+
python -m pip install -e dlslime
144149
python -m pip show dlslime
145150
python -c "import dlslime; print(\"dlslime:\", dlslime.__file__); print(\"available_nic:\", dlslime.available_nic())"
146-
pytest tests/python -v
151+
pytest dlslime/tests/python -v
147152
'
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
# Build & publish the `dlslime-ctrl` image to GitHub Container Registry (GHCR).
2+
#
3+
# Triggers:
4+
# - Push to main / master -> publish `:edge` (and the commit SHA)
5+
# - Push of a tag like v0.1.7 -> publish `:0.1.7`, `:0.1`, `:latest`
6+
# - Manual workflow_dispatch -> custom tag via input
7+
#
8+
# No external secrets needed — uses the built-in GITHUB_TOKEN for auth.
9+
# After the first successful run, go to:
10+
# https://github.com/orgs/DeepLink-org/packages/container/dlslime-ctrl/settings
11+
# and set the package visibility to "Public" so anonymous `docker pull` works.
12+
13+
name: docker-publish
14+
15+
on:
16+
push:
17+
branches: [main, master]
18+
tags: ["v*"]
19+
paths:
20+
- "dlslime-ctrl/**"
21+
- "docker/**"
22+
- ".github/workflows/docker-publish.yml"
23+
workflow_dispatch:
24+
inputs:
25+
tag:
26+
description: "Extra tag to publish (e.g. dev, rc1)"
27+
required: false
28+
default: ""
29+
30+
permissions:
31+
contents: read
32+
packages: write
33+
34+
env:
35+
REGISTRY: ghcr.io
36+
# GHCR requires lowercase. ${{ github.repository_owner }} = "DeepLink-org",
37+
# so we explicitly lower-case it via the `tolower` toy below.
38+
IMAGE_NAME: deeplink-org/dlslime-ctrl
39+
40+
jobs:
41+
publish:
42+
runs-on: ubuntu-24.04
43+
steps:
44+
- uses: actions/checkout@v6
45+
46+
- name: Set up QEMU (for multi-arch)
47+
uses: docker/setup-qemu-action@v3
48+
49+
- name: Set up Docker Buildx
50+
uses: docker/setup-buildx-action@v3
51+
52+
- name: Log in to GHCR
53+
uses: docker/login-action@v3
54+
with:
55+
registry: ${{ env.REGISTRY }}
56+
username: ${{ github.actor }}
57+
password: ${{ secrets.GITHUB_TOKEN }}
58+
59+
# Generate tags + OCI labels automatically based on the git ref.
60+
# v1.2.3 -> 1.2.3, 1.2, latest
61+
# main -> edge
62+
# any push -> sha-<short>
63+
- name: Compute image metadata
64+
id: meta
65+
uses: docker/metadata-action@v5
66+
with:
67+
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
68+
tags: |
69+
type=semver,pattern={{version}}
70+
type=semver,pattern={{major}}.{{minor}}
71+
type=raw,value=edge,enable={{is_default_branch}}
72+
type=sha,prefix=sha-,format=short
73+
type=raw,value=${{ github.event.inputs.tag }},enable=${{ github.event_name == 'workflow_dispatch' && github.event.inputs.tag != '' }}
74+
labels: |
75+
org.opencontainers.image.title=dlslime-ctrl
76+
org.opencontainers.image.description=DLSlime control plane server
77+
org.opencontainers.image.source=https://github.com/${{ github.repository }}
78+
org.opencontainers.image.licenses=MIT
79+
80+
- name: Build & push (linux/amd64, linux/arm64)
81+
uses: docker/build-push-action@v6
82+
with:
83+
context: .
84+
file: docker/ctrl.Dockerfile
85+
platforms: linux/amd64,linux/arm64
86+
push: true
87+
tags: ${{ steps.meta.outputs.tags }}
88+
labels: ${{ steps.meta.outputs.labels }}
89+
cache-from: type=gha
90+
cache-to: type=gha,mode=max
91+
92+
- name: Print published tags
93+
run: |
94+
echo "Published tags:"
95+
echo "${{ steps.meta.outputs.tags }}"

.github/workflows/pypi-publish.yml

Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
# Build & publish `dlslime` and `dlslime-ctrl` wheels to PyPI on tag push.
2+
#
3+
# Auth: PyPI Trusted Publishing via OIDC — no secrets needed.
4+
# See RELEASING.md for the one-time PyPI configuration.
5+
#
6+
# Triggered by `vX.Y.Z` tags (e.g. as produced by scripts/release.sh).
7+
8+
name: pypi-publish
9+
10+
on:
11+
push:
12+
tags: ["v*"]
13+
workflow_dispatch:
14+
inputs:
15+
dry_run:
16+
description: "Build wheels but skip the upload step"
17+
type: boolean
18+
default: false
19+
20+
permissions:
21+
id-token: write # required for OIDC token exchange with PyPI
22+
contents: read
23+
24+
jobs:
25+
# ---------------- dlslime (Python + C++ via scikit-build-core) -----------------
26+
build-dlslime-sdist:
27+
runs-on: ubuntu-24.04
28+
steps:
29+
- uses: actions/checkout@v6
30+
- uses: actions/setup-python@v6
31+
with:
32+
python-version: "3.12"
33+
- run: python -m pip install build
34+
- run: python -m build --sdist --outdir dist dlslime
35+
- uses: actions/upload-artifact@v4
36+
with:
37+
name: dist-dlslime-sdist
38+
path: dist/*.tar.gz
39+
40+
build-dlslime-wheels:
41+
runs-on: ubuntu-24.04
42+
strategy:
43+
fail-fast: false
44+
matrix:
45+
python: ["cp310", "cp311", "cp312", "cp313"]
46+
steps:
47+
- uses: actions/checkout@v6
48+
- uses: pypa/cibuildwheel@v3.4
49+
env:
50+
# Build only the matching Python ABI on x86_64 manylinux.
51+
CIBW_BUILD: "${{ matrix.python }}-manylinux_x86_64"
52+
CIBW_BEFORE_ALL_LINUX: |
53+
yum install -y libibverbs-devel numactl-devel || \
54+
(apt-get update && apt-get install -y libibverbs-dev libnuma-dev)
55+
CIBW_ENVIRONMENT: >-
56+
CMAKE_ARGS=-DBUILD_RDMA=ON -DBUILD_PYTHON=ON
57+
-DBUILD_NVLINK=OFF -DBUILD_TORCH_PLUGIN=OFF
58+
-DBUILD_ASCEND_DIRECT=OFF -DBUILD_TEST=OFF
59+
# Don't try to bundle libibverbs / libnuma into the wheel:
60+
# they're system-level libraries (provided by the rdma-core / numactl
61+
# packages on the user's host) and are NOT in the manylinux policy
62+
# whitelist, so auditwheel would otherwise fail with
63+
# `auditwheel repair ... failed with code 1`.
64+
# auditwheel must skip two classes of libraries:
65+
# 1. System RDMA / NUMA stack — provided by the user's rdma-core
66+
# install, NOT in the manylinux policy allowlist, must not be
67+
# vendored or wheels would conflict with host kernel modules.
68+
# 2. DLSlime's own sibling shared libs (lib_slime_rdma.so,
69+
# lib_slime_obs.so) — they're already installed next to
70+
# _slime_c.so inside the wheel and resolved at runtime via the
71+
# $ORIGIN rpath baked in by CMake. auditwheel doesn't know how to
72+
# look "inside the wheel" so we tell it to leave them alone.
73+
CIBW_REPAIR_WHEEL_COMMAND_LINUX: >-
74+
auditwheel repair
75+
--exclude libibverbs.so.1
76+
--exclude libnuma.so.1
77+
--exclude libmlx5.so.1
78+
--exclude libmlx4.so.1
79+
--exclude librdmacm.so.1
80+
--exclude libibumad.so.3
81+
--exclude libefa.so.1
82+
--exclude lib_slime_topology.so
83+
--exclude lib_slime_engine.so
84+
--exclude lib_slime_device.so
85+
--exclude lib_slime_obs.so
86+
--exclude lib_slime_rdma.so
87+
--exclude lib_slime_nvlink.so
88+
--exclude libascend_direct.so
89+
-w {dest_dir} {wheel}
90+
with:
91+
package-dir: dlslime
92+
output-dir: dist
93+
- uses: actions/upload-artifact@v4
94+
with:
95+
name: dist-dlslime-${{ matrix.python }}
96+
path: dist/*.whl
97+
98+
# ---------------- dlslime-ctrl (Rust bin wheel via maturin) --------------------
99+
build-dlslime-ctrl:
100+
runs-on: ubuntu-24.04
101+
steps:
102+
- uses: actions/checkout@v6
103+
- uses: actions/setup-python@v6
104+
with:
105+
python-version: "3.12"
106+
- uses: dtolnay/rust-toolchain@stable
107+
- run: python -m pip install "maturin>=1.0,<2.0"
108+
- run: maturin build --release --out dist --manifest-path dlslime-ctrl/Cargo.toml
109+
- uses: actions/upload-artifact@v4
110+
with:
111+
name: dist-dlslime-ctrl
112+
path: dist/*.whl
113+
114+
# ---------------- Publish to PyPI (Trusted Publishing) -------------------------
115+
publish-dlslime:
116+
needs: [build-dlslime-sdist, build-dlslime-wheels]
117+
if: ${{ github.event_name == 'push' || !inputs.dry_run }}
118+
runs-on: ubuntu-24.04
119+
environment: pypi # add manual-approval gate via repo Settings → Environments
120+
steps:
121+
- uses: actions/download-artifact@v4
122+
with:
123+
pattern: dist-dlslime-*
124+
path: dist
125+
merge-multiple: true
126+
- uses: pypa/gh-action-pypi-publish@release/v1
127+
with:
128+
packages-dir: dist
129+
# No `password:` — OIDC handles auth.
130+
131+
publish-dlslime-ctrl:
132+
needs: [build-dlslime-ctrl]
133+
if: ${{ github.event_name == 'push' || !inputs.dry_run }}
134+
runs-on: ubuntu-24.04
135+
environment: pypi
136+
steps:
137+
- uses: actions/download-artifact@v4
138+
with:
139+
name: dist-dlslime-ctrl
140+
path: dist
141+
- uses: pypa/gh-action-pypi-publish@release/v1
142+
with:
143+
packages-dir: dist

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -221,4 +221,4 @@ scripts/.bin/
221221

222222
.codex
223223

224-
bench/results/**
224+
dlslime/bench/results/**

0 commit comments

Comments
 (0)