Skip to content

Commit 10f35f3

Browse files
authored
Merge branch 'main' into feat/dflash-vlm-sglang
2 parents c0bb555 + a8ae45f commit 10f35f3

7 files changed

Lines changed: 70 additions & 49 deletions

File tree

.github/workflows/publish_pypi.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ jobs:
2121
with:
2222
python-version: '3.11'
2323

24-
- run: python setup.py sdist build
24+
- run: pip install build && python -m build --sdist
2525

2626
# publish to PyPI if executed on the main branch
2727
- name: Publish package to PyPI
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
echo "=== Checking GPU Utilization ==="
2+
3+
# Get GPU indices and their utilization
4+
nvidia-smi --query-gpu=index,utilization.gpu --format=csv,noheader,nounits | while IFS=',' read -r gpu_index utilization; do
5+
gpu_index=$(echo "$gpu_index" | tr -d ' ')
6+
utilization=$(echo "$utilization" | tr -d ' ')
7+
8+
if [ "$utilization" -eq 0 ]; then
9+
echo "GPU $gpu_index has 0% utilization — checking for processes..."
10+
11+
# Get PIDs running on this GPU
12+
pids=$(nvidia-smi --query-compute-apps=pid --format=csv,noheader --id="$gpu_index")
13+
14+
if [ -z "$pids" ]; then
15+
echo " No processes found on GPU $gpu_index."
16+
else
17+
echo " Killing processes on GPU $gpu_index: $pids"
18+
for pid in $pids; do
19+
pid=$(echo "$pid" | tr -d ' ')
20+
echo " Killing PID $pid..."
21+
# kill -9 "$pid" && echo " PID $pid killed." || echo " Failed to kill PID $pid (may need sudo)."
22+
docker run --rm --privileged --pid=host ubuntu bash -c "kill -9 $pid"
23+
done
24+
fi
25+
else
26+
echo "GPU $gpu_index is active ($utilization% utilization) — skipping."
27+
fi
28+
done
29+
30+
echo ""

.github/workflows/test.yaml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ jobs:
1919
runs-on: [self-hosted]
2020
container:
2121
image: lmsysorg/sglang:v0.5.5 # we lock to this version to avoid repeated docker pull
22-
options: --gpus all --shm-size=2g --rm -v /dev/shm
22+
options: --gpus all --shm-size=2g --rm -v /dev/shm --privileged
2323
steps:
2424
- name: Checkout code
2525
uses: actions/checkout@v4
@@ -49,6 +49,11 @@ jobs:
4949
uv pip install setuptools
5050
MAX_JOBS=8 uv pip install -v ".[fa]" --prerelease=allow --no-build-isolation
5151
52+
- name: Kill GPU processes
53+
shell: bash
54+
run: |
55+
bash .github/workflows/scripts/delete_gpu_process.sh
56+
5257
- name: Run test
5358
timeout-minutes: 30
5459
shell: bash
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
{
2+
"architectures": [
3+
"LlamaForCausalLMEagle3"
4+
],
5+
"attention_bias": false,
6+
"attention_dropout": 0.0,
7+
"bos_token_id": 151643,
8+
"eos_token_id": 151645,
9+
"head_dim": 128,
10+
"hidden_act": "silu",
11+
"hidden_size": 2048,
12+
"initializer_range": 0.02,
13+
"intermediate_size": 8192,
14+
"max_position_embeddings": 4096,
15+
"model_type": "llama",
16+
"num_attention_heads": 16,
17+
"num_hidden_layers": 1,
18+
"num_key_value_heads": 4,
19+
"rms_norm_eps": 1e-06,
20+
"rope_scaling": null,
21+
"rope_theta": 1000000.0,
22+
"tie_word_embeddings": false,
23+
"torch_dtype": "bfloat16",
24+
"use_cache": true,
25+
"vocab_size": 151936,
26+
"draft_vocab_size": 32000
27+
}

pyproject.toml

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,12 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "specforge"
7-
dynamic = ["version", "description"]
7+
dynamic = ["version"]
88
readme = "README.md"
99
requires-python = ">=3.11"
10+
description = "SpecForge: Speculative Decoding Training Framework"
11+
authors = [{name = "SGLang Team"}]
12+
urls = {Homepage = "https://github.com/sgl-project/SpecForge"}
1013
dependencies = [
1114
"pre-commit",
1215
"torch==2.9.1",
@@ -29,8 +32,8 @@ dependencies = [
2932
"yunchang",
3033
]
3134

32-
[tool.setuptools]
33-
packages = ["specforge"]
35+
[tool.setuptools.packages.find]
36+
exclude = ["configs*", "scripts*", "tests*"]
3437

3538
[project.optional-dependencies]
3639
dev = [
@@ -41,4 +44,3 @@ fa = ["flash-attn"]
4144

4245
[tool.setuptools.dynamic]
4346
version = {file = "version.txt"}
44-
description = {file = "README.md"}

setup.py

Lines changed: 0 additions & 33 deletions
This file was deleted.

tests/test_modeling/test_target/test_sglang_backend/test_sglang_backend.py

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@ def test_dense(rank, world_size, port, tp_size):
3434
mem_fraction_static=0.4,
3535
# enable_torch_compile=True,
3636
enable_nccl_nvls=True,
37-
# enable_symm_mem=True,
3837
enable_symm_mem=False,
3938
enable_torch_compile=True,
4039
enable_dp_attention=False,
@@ -73,9 +72,6 @@ def test_moe(rank, world_size, port, tp_size):
7372
mem_fraction_static=0.4,
7473
enable_torch_compile=True,
7574
enable_nccl_nvls=True,
76-
# enable_symm_mem=True,
77-
# enable_dp_attention=True,
78-
# enable_dp_lm_head=True,
7975
enable_symm_mem=False,
8076
enable_dp_attention=False,
8177
enable_dp_lm_head=False,
@@ -213,11 +209,8 @@ def test_vlm(rank, world_size, port, tp_size):
213209
attention_backend="fa3",
214210
mem_fraction_static=0.75,
215211
enable_torch_compile=True,
216-
# enable_nccl_nvls=False,
217212
enable_nccl_nvls=True,
218213
enable_symm_mem=False, # Disable to avoid nccl_allocator compilation issues
219-
# enable_dp_attention=True,
220-
# enable_dp_lm_head=True,
221214
enable_dp_attention=False,
222215
enable_dp_lm_head=False,
223216
enable_piecewise_cuda_graph=True,
@@ -376,10 +369,7 @@ def test_vlm_multi_batch(rank, world_size, port, tp_size):
376369
mem_fraction_static=0.4,
377370
enable_nccl_nvls=True,
378371
enable_torch_compile=True,
379-
# enable_nccl_nvls=False,
380372
enable_symm_mem=False,
381-
# enable_dp_attention=True,
382-
# enable_dp_lm_head=True,
383373
enable_dp_attention=False,
384374
enable_dp_lm_head=False,
385375
enable_piecewise_cuda_graph=True,

0 commit comments

Comments
 (0)