diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml index b2d8bf50..b446e8a2 100644 --- a/.github/workflows/docker-publish.yml +++ b/.github/workflows/docker-publish.yml @@ -2,7 +2,7 @@ # # Triggers: # - Push to main / master -> publish `:edge` (and the commit SHA) -# - Push of a tag like v0.1.4 -> publish `:0.1.4`, `:0.1`, `:latest` +# - Push of a tag like v0.1.7 -> publish `:0.1.7`, `:0.1`, `:latest` # - Manual workflow_dispatch -> custom tag via input # # No external secrets needed — uses the built-in GITHUB_TOKEN for auth. diff --git a/.github/workflows/pypi-publish.yml b/.github/workflows/pypi-publish.yml index 2e85e8e1..d536eb98 100644 --- a/.github/workflows/pypi-publish.yml +++ b/.github/workflows/pypi-publish.yml @@ -56,6 +56,37 @@ jobs: CMAKE_ARGS=-DBUILD_RDMA=ON -DBUILD_PYTHON=ON -DBUILD_NVLINK=OFF -DBUILD_TORCH_PLUGIN=OFF -DBUILD_ASCEND_DIRECT=OFF -DBUILD_TEST=OFF + # Don't try to bundle libibverbs / libnuma into the wheel: + # they're system-level libraries (provided by the rdma-core / numactl + # packages on the user's host) and are NOT in the manylinux policy + # whitelist, so auditwheel would otherwise fail with + # `auditwheel repair ... failed with code 1`. + # auditwheel must skip two classes of libraries: + # 1. System RDMA / NUMA stack — provided by the user's rdma-core + # install, NOT in the manylinux policy allowlist, must not be + # vendored or wheels would conflict with host kernel modules. + # 2. DLSlime's own sibling shared libs (lib_slime_rdma.so, + # lib_slime_obs.so) — they're already installed next to + # _slime_c.so inside the wheel and resolved at runtime via the + # $ORIGIN rpath baked in by CMake. auditwheel doesn't know how to + # look "inside the wheel" so we tell it to leave them alone. + CIBW_REPAIR_WHEEL_COMMAND_LINUX: >- + auditwheel repair + --exclude libibverbs.so.1 + --exclude libnuma.so.1 + --exclude libmlx5.so.1 + --exclude libmlx4.so.1 + --exclude librdmacm.so.1 + --exclude libibumad.so.3 + --exclude libefa.so.1 + --exclude lib_slime_topology.so + --exclude lib_slime_engine.so + --exclude lib_slime_device.so + --exclude lib_slime_obs.so + --exclude lib_slime_rdma.so + --exclude lib_slime_nvlink.so + --exclude libascend_direct.so + -w {dest_dir} {wheel} with: package-dir: dlslime output-dir: dist diff --git a/dlslime-ctrl/Cargo.lock b/dlslime-ctrl/Cargo.lock index 187dbb54..ad1e4651 100644 --- a/dlslime-ctrl/Cargo.lock +++ b/dlslime-ctrl/Cargo.lock @@ -310,7 +310,7 @@ dependencies = [ [[package]] name = "dlslime-ctrl" -version = "0.1.4" +version = "0.1.7" dependencies = [ "anyhow", "axum", diff --git a/dlslime-ctrl/Cargo.toml b/dlslime-ctrl/Cargo.toml index 6a23a879..194055d6 100644 --- a/dlslime-ctrl/Cargo.toml +++ b/dlslime-ctrl/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "dlslime-ctrl" -version = "0.1.4" +version = "0.1.7" edition = "2021" description = "DLSlime control plane server" license = "MIT" diff --git a/dlslime-ctrl/pyproject.toml b/dlslime-ctrl/pyproject.toml index dba6cbc0..a67ed9cb 100644 --- a/dlslime-ctrl/pyproject.toml +++ b/dlslime-ctrl/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "maturin" [project] name = "dlslime-ctrl" -version = "0.1.4" +version = "0.1.7" description = "DLSlime control-plane server (Redis-backed service registry and peer-agent coordinator)" requires-python = ">=3.10" # The Python client lives in the `dlslime` package as `dlslime.ctrl.NanoCtrlClient`. diff --git a/dlslime/pyproject.toml b/dlslime/pyproject.toml index c17481a0..f3b4f0f1 100644 --- a/dlslime/pyproject.toml +++ b/dlslime/pyproject.toml @@ -25,7 +25,7 @@ description = "DLSlime Transfer Engine" name = "dlslime" readme = "README.md" requires-python = ">=3.8" -version = "0.1.4" +version = "0.1.7" dependencies = [ "pydantic>=2.0", "pyzmq>=25.0", diff --git a/docker/.env.example b/docker/.env.example index 35af70cb..b376de8d 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -27,5 +27,5 @@ DLSLIME_CTRL_RUST_LOG=info # By default both compose files build the image locally from ctrl.Dockerfile. # To pull a pre-built image from GitHub Container Registry (GHCR) instead: -# DLSLIME_CTRL_IMAGE=ghcr.io/deeplink-org/dlslime-ctrl:0.1.4 +# DLSLIME_CTRL_IMAGE=ghcr.io/deeplink-org/dlslime-ctrl:0.1.7 # DLSLIME_CTRL_PULL_POLICY=missing diff --git a/docker/README.md b/docker/README.md index d5ffcf54..cf32eb51 100644 --- a/docker/README.md +++ b/docker/README.md @@ -104,7 +104,7 @@ Why GHCR rather than Docker Hub: ```bash cat >> docker/.env <<'EOF' -DLSLIME_CTRL_IMAGE=ghcr.io/deeplink-org/dlslime-ctrl:0.1.4 +DLSLIME_CTRL_IMAGE=ghcr.io/deeplink-org/dlslime-ctrl:0.1.7 DLSLIME_CTRL_PULL_POLICY=missing EOF @@ -127,7 +127,7 @@ The workflow [`.github/workflows/docker-publish.yml`](../.github/workflows/docke | Trigger | Tags published | | -------------------------- | --------------------------------------- | | Push to `main` / `master` | `edge`, `sha-` | -| Push tag `v0.1.4` | `0.1.4`, `0.1`, `latest`, `sha-` | +| Push tag `v0.1.7` | `0.1.7`, `0.1`, `latest`, `sha-` | | Manual `workflow_dispatch` | optional extra tag from the input | One-time setup after the **first** successful workflow run, in the GitHub UI: @@ -141,12 +141,12 @@ Releasing a new version: ```bash # bump versions in: # Cargo.toml, dlslime-ctrl/pyproject.toml, dlslime/pyproject.toml, pyproject.toml -git commit -am "release: v0.1.4" -git tag v0.1.4 +git commit -am "release: v0.1.7" +git tag v0.1.7 git push origin main --tags ``` -The workflow will build `linux/amd64` + `linux/arm64` and push `0.1.4`, `0.1`, `latest`. +The workflow will build `linux/amd64` + `linux/arm64` and push `0.1.7`, `0.1`, `latest`. ### Manual push (without CI) @@ -162,7 +162,7 @@ echo "$GHCR_PAT" | docker login ghcr.io -u --password-std # 3. Build multi-arch and push. docker buildx create --use --name dlslime-builder 2>/dev/null || docker buildx use dlslime-builder -VERSION=0.1.4 +VERSION=0.1.7 docker buildx build \ --platform linux/amd64,linux/arm64 \ -f docker/ctrl.Dockerfile \ diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 9e699205..1b580c37 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -41,8 +41,8 @@ services: # listening on 127.0.0.1 on the host (e.g. http_proxy=127.0.0.1:7890). network: host # Defaults to a locally-built tag. Override to a published GHCR image - # (e.g. `ghcr.io/deeplink-org/dlslime-ctrl:0.1.4`) to skip the local build: - # echo "DLSLIME_CTRL_IMAGE=ghcr.io/deeplink-org/dlslime-ctrl:0.1.4" >> docker/.env + # (e.g. `ghcr.io/deeplink-org/dlslime-ctrl:0.1.7`) to skip the local build: + # echo "DLSLIME_CTRL_IMAGE=ghcr.io/deeplink-org/dlslime-ctrl:0.1.7" >> docker/.env # echo "DLSLIME_CTRL_PULL_POLICY=missing" >> docker/.env image: ${DLSLIME_CTRL_IMAGE:-dlslime-ctrl:local} pull_policy: ${DLSLIME_CTRL_PULL_POLICY:-build} diff --git a/docs/pyproject.toml b/docs/pyproject.toml index ab2cee55..82933a16 100644 --- a/docs/pyproject.toml +++ b/docs/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dlslime-docs" -version = "0.1.4" +version = "0.1.7" description = "Documentation site tooling for DLSlime" requires-python = ">=3.10" dependencies = [