GSQ/pyproject.toml at main · IST-DASLab/GSQ · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
[build-system]
requires = ["setuptools>=68", "wheel"]
build-backend = "setuptools.build_meta"

[project]
name = "gsq"
version = "0.1.0"
description = "Gumbel Softmax Quantization for large language models"
readme = "README.md"
requires-python = ">=3.10"

dependencies = [
    "numpy",
    "transformers",
    "accelerate",
    "datasets",
    "safetensors",
    "compressed-tensors",
    "lion-pytorch",
    "wandb",
    "tqdm",
    "pyyaml",
    "python-dotenv",
    "tiktoken",
    "lm-eval[api]",
    "lighteval",
    # vLLM 0.20.2 metadata pins torch==2.11.0 and torchvision==0.26.0 exactly;
    # mirror those constraints here so uv doesn't backtrack vllm to 0.1.2.
    "torch==2.11.0",
    "torchvision==0.26.0",
    "vllm==0.20.2",
    "ray",
    "ninja",
    "humming-kernels",
    "matplotlib",
]

[tool.setuptools.packages.find]
where = ["."]
include = ["src*"]

# ── Pin torch (and friends) to the PyTorch CUDA wheel index ────────────────
# Default is cu130 to match vLLM 0.20+ which ships a CUDA-13 ABI (its _C.abi3.so
# requires libcudart.so.13). To target a different CUDA version, override the
# index URL at the command line, e.g.:
#   UV_INDEX_PYTORCH=https://download.pytorch.org/whl/cu128 uv sync
# or pass --index-strategy unsafe-best-match with --index pytorch=<url>.
# When changing this, also bump TORCH_CUDA in scripts/setup_env.sh and re-pin
# vLLM accordingly: cu128 needs vllm<0.20; cu130 works with vllm>=0.20.
[[tool.uv.index]]
name = "pytorch"
url = "https://download.pytorch.org/whl/cu130"
explicit = true

[tool.uv.sources]
torch = { index = "pytorch" }
torchvision = { index = "pytorch" }
torchaudio = { index = "pytorch" }