Skip to content

Commit aedb4f4

Browse files
committed
chore(deps): upgrade runtime dependencies and CI workflow
Upgrade megatron-core, sglang, vllm (0.19.1), transformers, and related packages. Pin deepep/deepgemm commits and lock trackio version for reproducibility. Key changes: - Upgrade sglang, vllm, transformers, megatron-core versions - Pin vllm to 0.19.1, fix compatibility across backends - Optimize Dockerfile and slim Docker image - Add uv_sync.sh install script, fix uv install on Linux - Fix Archon Qwen3.5 precision and port range issues - Remove integration tests from PR CI (moved to nightly) - Sync GRPO integration test config Refs: upgrade-deps branch
1 parent 9c4c497 commit aedb4f4

43 files changed

Lines changed: 4461 additions & 2911 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.agents/skills/upgrade-deps/checklists/sglang.md

Lines changed: 168 additions & 21 deletions
Large diffs are not rendered by default.

.github/workflows/test-areal.yml

Lines changed: 6 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -66,21 +66,20 @@ jobs:
6666
github.event_name == 'workflow_dispatch'
6767
needs:
6868
- determine-variants
69-
name: Provision GCP runner (${{ matrix.variant }} ${{ matrix.test_type }})
69+
name: Provision GCP runner (${{ matrix.variant }})
7070
runs-on: ubuntu-latest
7171
strategy:
7272
fail-fast: false
7373
matrix:
7474
variant: ${{ fromJson(needs.determine-variants.outputs.matrix) }}
75-
test_type: [unit, integration]
7675
env:
7776
CONTAINER_IMAGE: ghcr.io/inclusionai/areal-runtime:${{ inputs.image_tag || 'dev' }}-${{ matrix.variant }}
78-
RUNNER_LABELS: gcp-a2-highgpu-2g,variant-${{ matrix.variant }},test-type-${{ matrix.test_type }}
77+
RUNNER_LABELS: gcp-a2-highgpu-2g,variant-${{ matrix.variant }}
7978
steps:
8079
- name: Set instance variables
8180
id: vars
8281
run: |
83-
echo "instance_name=gcp-runner-${{ github.run_id }}-${{ matrix.variant }}-${{ matrix.test_type }}" >> "$GITHUB_OUTPUT"
82+
echo "instance_name=gcp-runner-${{ github.run_id }}-${{ matrix.variant }}" >> "$GITHUB_OUTPUT"
8483
8584
- name: Authenticate to Google Cloud
8685
uses: google-github-actions/auth@v3
@@ -287,12 +286,11 @@ jobs:
287286
needs:
288287
- determine-variants
289288
- provision-runner
290-
name: Run AReaL tests (${{ matrix.variant }} ${{ matrix.test_type }})
289+
name: Run AReaL unit tests (${{ matrix.variant }})
291290
strategy:
292291
fail-fast: false
293292
matrix:
294293
variant: ${{ fromJson(needs.determine-variants.outputs.matrix) }}
295-
test_type: [unit, integration]
296294
environment:
297295
name: AReaL-unittests
298296
permissions:
@@ -301,7 +299,6 @@ jobs:
301299
- self-hosted
302300
- gcp-a2-highgpu-2g
303301
- "variant-${{ matrix.variant }}"
304-
- "test-type-${{ matrix.test_type }}"
305302
timeout-minutes: 120
306303
env:
307304
# Activate the venv created in the Docker image
@@ -316,7 +313,6 @@ jobs:
316313
python areal/tools/validate_docker_installation.py
317314
318315
- name: Run unit tests
319-
if: matrix.test_type == 'unit'
320316
env:
321317
CI: true
322318
HF_TOKEN: ${{ secrets.HF_TOKEN }}
@@ -328,32 +324,8 @@ jobs:
328324
export PATH="/opt/.venv/bin:$PATH"
329325
pytest -m "(not slow or ci) and not ${EXCLUDE_BACKEND}" --durations=20 -s -vv tests/test_*.py tests/experimental/ tests/infra/
330326
331-
- name: Run SFT integration tests
332-
if: matrix.test_type == 'integration'
333-
env:
334-
CI: true
335-
HF_TOKEN: ${{ secrets.HF_TOKEN }}
336-
PYTHONPATH: ${{ github.workspace }}
337-
TOKENIZERS_PARALLELISM: false
338-
VIRTUAL_ENV: /opt/.venv
339-
run: |
340-
export PATH="/opt/.venv/bin:$PATH"
341-
pytest -m "not ${EXCLUDE_BACKEND}" -s -vv tests/sft/
342-
343-
- name: Run GRPO integration tests
344-
if: matrix.test_type == 'integration'
345-
env:
346-
CI: true
347-
HF_TOKEN: ${{ secrets.HF_TOKEN }}
348-
PYTHONPATH: ${{ github.workspace }}
349-
TOKENIZERS_PARALLELISM: false
350-
VIRTUAL_ENV: /opt/.venv
351-
run: |
352-
export PATH="/opt/.venv/bin:$PATH"
353-
pytest -m "not ${EXCLUDE_BACKEND}" -s -vv tests/grpo/
354-
355327
cleanup:
356-
name: Tear down GCP runner (${{ matrix.variant }} ${{ matrix.test_type }})
328+
name: Tear down GCP runner (${{ matrix.variant }})
357329
needs:
358330
- determine-variants
359331
- unit-tests
@@ -364,7 +336,6 @@ jobs:
364336
fail-fast: false
365337
matrix:
366338
variant: ${{ fromJson(needs.determine-variants.outputs.matrix) }}
367-
test_type: [unit, integration]
368339
steps:
369340
- name: Authenticate to Google Cloud
370341
uses: google-github-actions/auth@v3
@@ -376,7 +347,7 @@ jobs:
376347

377348
- name: Delete runner instance
378349
env:
379-
INSTANCE_NAME: gcp-runner-${{ github.run_id }}-${{ matrix.variant }}-${{ matrix.test_type }}
350+
INSTANCE_NAME: gcp-runner-${{ github.run_id }}-${{ matrix.variant }}
380351
run: |
381352
# Look up instance zone dynamically since matrix job outputs
382353
# cannot be consumed per-element by downstream matrix jobs.

0 commit comments

Comments
 (0)