Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 71 additions & 33 deletions .github/workflows/build-docker-image.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ permissions:
env:
GCP_PROJECT_ID: ${{ secrets.GCP_PROJECT_ID }}
VALIDATOR_LABELS: gcp-docker-validator
RUNNER_VERSION: '2.317.0'
RUNNER_VERSION: '2.332.0'
IMAGE_NAME: ghcr.io/inclusionai/areal-runtime
IMAGE_TAG: test

Expand Down Expand Up @@ -111,12 +111,12 @@ jobs:

throw new Error(`Timed out waiting for builder runner ${instanceName} to come online.`);

build-and-push-image:
build-and-push-images:
needs:
- start-builder
name: Build and push Docker image
name: Build and push Docker images
runs-on: [self-hosted, areal-docker-builder]
timeout-minutes: 180
timeout-minutes: 240 # ~90 min/image x 2 + buffer for cache misses
steps:
- uses: actions/checkout@v6

Expand All @@ -132,35 +132,65 @@ jobs:
username: inclusionai
password: ${{ secrets.GHCR_TOKEN }}

- name: Build and push Docker image
- name: Build and push sglang image
uses: docker/build-push-action@v7
with:
context: .
file: ./Dockerfile
push: true
tags: ${{ env.IMAGE_NAME }}:${{ env.IMAGE_TAG }}
tags: ${{ env.IMAGE_NAME }}:${{ env.IMAGE_TAG }}-sglang
build-args: |
VARIANT=sglang

- name: Build and push vllm image
uses: docker/build-push-action@v7
with:
context: .
file: ./Dockerfile
push: true
tags: ${{ env.IMAGE_NAME }}:${{ env.IMAGE_TAG }}-vllm
build-args: |
VARIANT=vllm

- name: Image details
run: |
echo "✅ Docker image built and pushed successfully!"
echo "Image: ${{ env.IMAGE_NAME }}:${{ env.IMAGE_TAG }}"
echo "✅ Docker images built and pushed successfully!"
echo "Images:"
echo " - ${{ env.IMAGE_NAME }}:${{ env.IMAGE_TAG }}-sglang"
echo " - ${{ env.IMAGE_NAME }}:${{ env.IMAGE_TAG }}-vllm"
echo "Commit: ${{ github.sha }}"
echo "Branch: ${{ github.head_ref || github.ref_name }}"

run-tests:
name: Run tests with test image
run-tests-sglang:
name: Run tests with sglang image
needs:
- build-and-push-images
uses: ./.github/workflows/test-areal.yml
with:
image_tag: test
variant: sglang
secrets: inherit

run-tests-vllm:
name: Run tests with vllm image
needs:
- build-and-push-image
- build-and-push-images
uses: ./.github/workflows/test-areal.yml
with:
image_tag: test
variant: vllm
secrets: inherit

promote-image:
name: Promote test image to dev
promote-images:
name: Promote ${{ matrix.variant }} test image to dev
needs:
- run-tests
- run-tests-sglang
- run-tests-vllm
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
variant: [sglang, vllm]
steps:
- name: Log in to GitHub Container Registry
uses: docker/login-action@v4
Expand All @@ -173,45 +203,53 @@ jobs:
env:
IMAGE_NAME: ghcr.io/inclusionai/areal-runtime
run: |
docker pull $IMAGE_NAME:test
docker tag $IMAGE_NAME:test $IMAGE_NAME:dev
docker push $IMAGE_NAME:dev
echo "✅ Image promoted from :test to :dev"
docker pull $IMAGE_NAME:test-${{ matrix.variant }}
docker tag $IMAGE_NAME:test-${{ matrix.variant }} $IMAGE_NAME:dev-${{ matrix.variant }}
docker push $IMAGE_NAME:dev-${{ matrix.variant }}
echo "✅ ${{ matrix.variant }} image promoted from :test-${{ matrix.variant }} to :dev-${{ matrix.variant }}"

cleanup-test-image:
name: Delete test image from registry
cleanup-test-images:
name: Delete ${{ matrix.variant }} test image from registry
needs:
- build-and-push-image
- run-tests
- promote-image
if: always() && needs.build-and-push-image.result == 'success'
- build-and-push-images
- run-tests-sglang
- run-tests-vllm
- promote-images
if: always() && needs.build-and-push-images.result == 'success'
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
variant: [sglang, vllm]
steps:
- name: Delete test image from GHCR
env:
GH_TOKEN: ${{ secrets.GHCR_TOKEN }}
run: |
PACKAGE_NAME="areal-runtime"
TAG="test-${{ matrix.variant }}"
# Get the package version ID for the test tag
PACKAGE_VERSION_ID=$(curl -s -H "Authorization: Bearer $GH_TOKEN" \
"https://api.github.com/orgs/inclusionai/packages/container/areal-runtime/versions" \
| jq -r '.[] | select(.metadata.container.tags[] == "test") | .id')
"https://api.github.com/orgs/inclusionai/packages/container/$PACKAGE_NAME/versions?per_page=100" \
| jq -r ".[] | select(.metadata.container.tags[] == \"$TAG\") | .id")

if [ -n "$PACKAGE_VERSION_ID" ] && [ "$PACKAGE_VERSION_ID" != "null" ]; then
curl -X DELETE -H "Authorization: Bearer $GH_TOKEN" \
"https://api.github.com/orgs/inclusionai/packages/container/areal-runtime/versions/$PACKAGE_VERSION_ID"
echo "✅ Deleted test image from registry"
"https://api.github.com/orgs/inclusionai/packages/container/$PACKAGE_NAME/versions/$PACKAGE_VERSION_ID"
echo "✅ Deleted ${{ matrix.variant }} test image from registry"
else
echo "⚠️ Test image not found or already deleted"
echo "⚠️ ${{ matrix.variant }} test image not found or already deleted"
fi

stop-builder:
name: Stop areal-docker-builder instance
needs:
- start-builder
- build-and-push-image
- run-tests
- promote-image
- cleanup-test-image
- build-and-push-images
- run-tests-sglang
- run-tests-vllm
- promote-images
- cleanup-test-images
if: always() && needs.start-builder.outputs.was_running != 'true'
runs-on: ubuntu-latest
env:
Expand Down
24 changes: 18 additions & 6 deletions .github/workflows/install-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,13 @@ jobs:
# Test CUDA extras installation on Linux only (CUDA packages have Linux-only wheels)
# Note: flash-attn is excluded because it requires CUDA compilation infrastructure
# which is not available on standard GitHub runners
# sglang and vllm are conflicting extras, so we test each separately
runs-on: ubuntu-latest
name: Install test (Linux, CUDA extras)
strategy:
fail-fast: false
matrix:
variant: [sglang, vllm]
name: Install test (Linux, CUDA extras, ${{ matrix.variant }})

steps:
- name: Checkout repository
Expand All @@ -96,17 +101,20 @@ jobs:
- name: Install package with CUDA extras (excluding flash-attn)
# flash-attn requires CUDA toolkit for compilation, skip it in CI
# Test individual extras that have pre-built wheels
run: uv sync --extra vllm --extra sglang --extra megatron --extra tms
run: uv sync --extra ${{ matrix.variant }} --extra megatron --extra tms

- name: Verify package import with CUDA extras
run: |
uv run python -c "import areal; print(f'areal version: {areal.__version__}')"

- name: Verify CUDA-dependent packages are installed
env:
VARIANT: ${{ matrix.variant }}
run: |
uv run python -c "
import importlib.util
packages = ['vllm', 'sglang', 'megatron.core']
import os, importlib.util
variant = os.environ['VARIANT']
packages = [variant, 'megatron.core']
for pkg in packages:
spec = importlib.util.find_spec(pkg)
status = 'installed' if spec else 'not found'
Expand All @@ -120,9 +128,13 @@ jobs:
# and `no space on device left` errors on busy runners
if: github.event_name == 'workflow_dispatch'
runs-on: ubuntu-latest
name: Install test (Docker runtime image)
strategy:
fail-fast: false
matrix:
variant: [sglang, vllm]
name: Install test (Docker ${{ matrix.variant }} image)
container:
image: ghcr.io/inclusionai/areal-runtime:v1.0.1
image: ghcr.io/inclusionai/areal-runtime:dev-${{ matrix.variant }}

steps:
- name: Checkout repository
Expand Down
41 changes: 31 additions & 10 deletions .github/workflows/tag-release-image.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ jobs:
start-builder:
name: Start areal-docker-builder instance
runs-on: ubuntu-latest
outputs:
was_running: ${{ steps.start-instance.outputs.was_running }}
env:
INSTANCE_NAME: areal-docker-builder
INSTANCE_ZONE: us-central1-f
Expand All @@ -39,6 +41,7 @@ jobs:
uses: google-github-actions/setup-gcloud@v3

- name: Start builder instance if stopped
id: start-instance
run: |
set -euo pipefail

Expand All @@ -55,12 +58,14 @@ jobs:

if [ "$status" = "RUNNING" ]; then
echo "Instance $INSTANCE_NAME is already running."
echo "was_running=true" >> $GITHUB_OUTPUT
elif [ "$status" = "TERMINATED" ] || [ "$status" = "SUSPENDED" ]; then
echo "Instance $INSTANCE_NAME is $status. Starting it..."
gcloud compute instances start "$INSTANCE_NAME" \
--project "$GCP_PROJECT_ID" \
--zone "$INSTANCE_ZONE"
echo "Instance started successfully."
echo "was_running=false" >> $GITHUB_OUTPUT
else
echo "Instance $INSTANCE_NAME has unexpected status: $status" >&2
exit 1
Expand Down Expand Up @@ -113,9 +118,9 @@ jobs:
build-and-push-release:
needs:
- start-builder
name: Build and push release Docker image
name: Build and push release Docker images
runs-on: [self-hosted, areal-docker-builder]
timeout-minutes: 180
timeout-minutes: 300
outputs:
package_version: ${{ steps.get-version.outputs.version }}
release_tag: ${{ steps.get-version.outputs.release_tag }}
Expand Down Expand Up @@ -150,25 +155,41 @@ jobs:
username: inclusionai
password: ${{ secrets.GHCR_TOKEN }}

- name: Build and push Docker image
- name: Build and push sglang image
uses: docker/build-push-action@v7
with:
context: .
file: ./Dockerfile
push: true
tags: |
${{ env.IMAGE_NAME }}:${{ steps.get-version.outputs.release_tag }}
${{ env.IMAGE_NAME }}:${{ steps.get-version.outputs.release_tag }}-sglang
${{ env.IMAGE_NAME }}:latest
${{ env.IMAGE_NAME }}:dev
${{ env.IMAGE_NAME }}:dev-sglang
build-args: |
VARIANT=sglang

- name: Build and push vllm image
uses: docker/build-push-action@v7
with:
context: .
file: ./Dockerfile
push: true
tags: |
${{ env.IMAGE_NAME }}:${{ steps.get-version.outputs.release_tag }}-vllm
${{ env.IMAGE_NAME }}:dev-vllm
build-args: |
VARIANT=vllm

- name: Image details
run: |
echo "Docker image built and pushed successfully!"
echo "Docker images built and pushed successfully!"
echo "Image: ${{ env.IMAGE_NAME }}"
echo "Tags:"
echo " - ${{ steps.get-version.outputs.release_tag }}"
echo " - latest"
echo " - dev"
echo " - ${{ steps.get-version.outputs.release_tag }}-sglang"
echo " - ${{ steps.get-version.outputs.release_tag }}-vllm"
echo " - latest (sglang)"
echo " - dev-sglang"
echo " - dev-vllm"
echo "Release: ${{ github.event.release.name || github.event.inputs.tag }}"
echo "Commit: ${{ github.sha }}"

Expand All @@ -177,7 +198,7 @@ jobs:
needs:
- build-and-push-release
- start-builder
if: always()
if: always() && needs.start-builder.outputs.was_running != 'true'
runs-on: ubuntu-latest
env:
INSTANCE_NAME: areal-docker-builder
Expand Down
Loading