Skip to content

Commit 3bbee0a

Browse files
authored
update readme and code format (#243)
1 parent 1b717a4 commit 3bbee0a

113 files changed

Lines changed: 915 additions & 2448 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.pre-commit-config.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ repos:
44
hooks:
55
- id: black
66
name: Black
7-
args: [--line-length=88]
7+
args: [--line-length=99]
88

99
- repo: https://github.com/pycqa/isort
1010
rev: 5.13.2
@@ -20,6 +20,6 @@ repos:
2020
name: Flake8
2121
args: [
2222
"--ignore=E203,W503,W504",
23-
"--max-line-length=88"
23+
"--max-line-length=99"
2424
]
2525
additional_dependencies: [flake8-bugbear]

README.md

Lines changed: 1 addition & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -221,61 +221,9 @@ Alternatively, you can clone the repository and install from source in editable
221221
cd AngelSlim && python setup.py install
222222
```
223223

224-
For more detailed installation instructions, please refer to the [Installation Documentation](https://angelslim.readthedocs.io/zh-cn/latest/getting_started/installation.html).
224+
For more detailed installation instructions and platform-specific guidance, please refer to the [Installation Documentation](https://angelslim.readthedocs.io/zh-cn/latest/getting_started/installation.html).
225225

226-
#### Windows Installation (with FP8 Triton Support)
227226

228-
AngelSlim supports Windows with FP8 Triton kernels. Follow these steps to build from source:
229-
230-
```batch
231-
:: Clone the repository
232-
git clone https://github.com/Tencent/AngelSlim.git
233-
cd AngelSlim
234-
235-
:: Create and activate virtual environment (Python 3.10 recommended)
236-
uv venv --python 3.10
237-
.venv\Scripts\activate
238-
239-
:: Install base dependencies
240-
uv pip install packaging wheel setuptools ninja numpy==1.26.4 pip build psutil
241-
242-
:: Install PyTorch with CUDA 12.8 support
243-
uv pip install torch==2.10.0 --index-url https://download.pytorch.org/whl/cu128
244-
245-
:: Install Triton for Windows
246-
uv pip install -U triton-windows
247-
248-
:: Configure Visual Studio build environment
249-
set INCLUDE=
250-
set LIB=
251-
set LIBPATH=
252-
call "C:\Program Files\Microsoft Visual Studio\2022\Community\VC\Auxiliary\Build\vcvarsall.bat" x64
253-
254-
:: Configure CUDA environment
255-
set CUDA_HOME=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8
256-
set PATH=%CUDA_HOME%\bin;%PATH%
257-
set DISTUTILS_USE_SDK=1
258-
259-
:: Set target CUDA architectures (adjust based on your GPU)
260-
set TORCH_CUDA_ARCH_LIST=8.0;8.6;8.9;9.0
261-
262-
:: Build the wheel
263-
set DG_USE_LOCAL_VERSION=0
264-
python setup.py bdist_wheel
265-
266-
:: Verify FP8 Triton kernels are working
267-
python -c "import torch; from angelslim.compressor.diffusion.kernels.python.quantizers import fp8_per_block_quant_triton; from angelslim.compressor.diffusion.kernels.python.gemm import fp8_gemm_triton_block; a,b=torch.randn(128,256,device='cuda'),torch.randn(512,256,device='cuda'); aq,a_s=fp8_per_block_quant_triton(a); bq,b_s=fp8_per_block_quant_triton(b); c=fp8_gemm_triton_block(aq,a_s,bq,b_s); print(f'FP8 GEMM OK: {c.shape}, {c.dtype}')"
268-
```
269-
270-
**Requirements:**
271-
- Windows 10/11 with NVIDIA GPU (Ampere or newer recommended)
272-
- Visual Studio 2022 with C++ build tools
273-
- CUDA Toolkit 12.8
274-
- Python 3.10
275-
276-
**Environment Variables:**
277-
- `ANGELSLIM_BACKEND`: Force backend selection (`triton` or `pytorch`)
278-
- `ANGELSLIM_TORCH_COMPILE`: Enable/disable torch.compile (`0` or `1`)
279227

280228
### 2. Quick Start
281229

README_cn.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,7 @@ pip install angelslim
223223
cd AngelSlim && python setup.py install
224224
```
225225

226-
更详细的安装说明可参考[安装文档](https://angelslim.readthedocs.io/zh-cn/latest/getting_started/installation.html)
226+
更详细的安装说明以及不同平台的安装指引,可参考[安装文档](https://angelslim.readthedocs.io/zh-cn/latest/getting_started/installation.html)
227227

228228
### 2、快速开始
229229

angelslim/compressor/_platform.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@
2828
import sys
2929
from enum import Enum
3030
from functools import lru_cache
31-
from typing import Optional
3231

3332
import torch
3433

@@ -90,7 +89,7 @@ def is_triton_available() -> bool:
9089

9190
# Try to import triton
9291
try:
93-
import triton
92+
import triton # noqa: F811 F401
9493

9594
# Test if JIT compilation works
9695
return _test_triton_jit()
@@ -200,7 +199,7 @@ def get_backend_info() -> dict:
200199
"triton_available": is_triton_available(),
201200
"torch_compile_supported": is_torch_compile_supported(),
202201
"cuda_available": torch.cuda.is_available(),
203-
"cuda_device": torch.cuda.get_device_name() if torch.cuda.is_available() else None,
202+
"cuda_device": (torch.cuda.get_device_name() if torch.cuda.is_available() else None),
204203
"torch_version": torch.__version__,
205204
"env_backend": os.environ.get("ANGELSLIM_BACKEND", "auto"),
206205
"env_torch_compile": os.environ.get("ANGELSLIM_TORCH_COMPILE", "auto"),

angelslim/compressor/compressor_factory.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,7 @@ def register_class(compress_cls: Type[Any]) -> Type[Any]:
3737
"""Register a class using its own name as the key"""
3838
key = compress_cls.__name__
3939
if key in cls._compress_methods:
40-
print_info(
41-
f"Compression method '{key}' already exists, will be overwritten."
42-
)
40+
print_info(f"Compression method '{key}' already exists, will be overwritten.")
4341
cls._compress_methods[key] = compress_cls
4442
return compress_cls
4543

angelslim/compressor/diffusion/cache/cache_helper.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,7 @@ def enable(self) -> None:
4141
ValueError: Raised when both double_blocks and single_blocks are empty
4242
"""
4343
if not self.double_blocks and not self.single_blocks:
44-
raise ValueError(
45-
"At least one of double_blocks or single_blocks must be provided"
46-
)
44+
raise ValueError("At least one of double_blocks or single_blocks must be provided")
4745

4846
self.reset_states()
4947
self.wrap_modules()

angelslim/compressor/diffusion/cache/deepcache_helper.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,7 @@ def __init__(
3434
single_blocks=single_blocks,
3535
no_cache_steps=no_cache_steps,
3636
)
37-
self.no_cache_block_id = (
38-
no_cache_block_id if no_cache_block_id is not None else {}
39-
)
37+
self.no_cache_block_id = no_cache_block_id if no_cache_block_id is not None else {}
4038

4139
def is_skip(self, block_id: int, blocktype: str) -> bool:
4240
# For some pipelines, the first timestep may not be 0

angelslim/compressor/diffusion/cache/taylorcache_helper.py

Lines changed: 8 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -218,12 +218,8 @@ def __init__(self, max_order: int):
218218
for i in range(max_order + 1):
219219
self.register_buffer(f"derivative_{i}_low_freqs", None, persistent=False)
220220
self.register_buffer(f"derivative_{i}_high_freqs", None, persistent=False)
221-
self.register_buffer(
222-
f"temp_derivative_{i}_low_freqs", None, persistent=False
223-
)
224-
self.register_buffer(
225-
f"temp_derivative_{i}_high_freqs", None, persistent=False
226-
)
221+
self.register_buffer(f"temp_derivative_{i}_low_freqs", None, persistent=False)
222+
self.register_buffer(f"temp_derivative_{i}_high_freqs", None, persistent=False)
227223

228224
def get_derivative(self, order: int, freqs: str) -> Optional[torch.Tensor]:
229225
return getattr(self, f"derivative_{order}_{freqs}")
@@ -265,14 +261,10 @@ def taylor_formula(self, distance: int) -> torch.Tensor:
265261
high_freqs_output = 0
266262
for i in range(len(self.get_all_filled_derivatives("low_freqs"))):
267263
coefficient = 1 / math.factorial(i)
268-
low_freqs_output += (
269-
coefficient * self.get_derivative(i, "low_freqs") * (distance**i)
270-
)
264+
low_freqs_output += coefficient * self.get_derivative(i, "low_freqs") * (distance**i)
271265
for i in range(len(self.get_all_filled_derivatives("high_freqs"))):
272266
coefficient = 1 / math.factorial(i)
273-
high_freqs_output += (
274-
coefficient * self.get_derivative(i, "high_freqs") * (distance**i)
275-
)
267+
high_freqs_output += coefficient * self.get_derivative(i, "high_freqs") * (distance**i)
276268

277269
return reconstruction(low_freqs_output, high_freqs_output)
278270

@@ -288,18 +280,16 @@ def derivatives_computation(
288280
self.set_temp_derivative(0, "high_freqs", x_high)
289281
for i in range(low_freqs_order):
290282
if self.get_derivative(i, "low_freqs") is not None:
291-
derivative_diff = self.get_temp_derivative(
283+
derivative_diff = self.get_temp_derivative(i, "low_freqs") - self.get_derivative(
292284
i, "low_freqs"
293-
) - self.get_derivative(i, "low_freqs")
285+
)
294286
self.set_temp_derivative(i + 1, "low_freqs", derivative_diff / distance)
295287
for i in range(high_freqs_order):
296288
if self.get_derivative(i, "high_freqs") is not None:
297-
derivative_diff = self.get_temp_derivative(
289+
derivative_diff = self.get_temp_derivative(i, "high_freqs") - self.get_derivative(
298290
i, "high_freqs"
299-
) - self.get_derivative(i, "high_freqs")
300-
self.set_temp_derivative(
301-
i + 1, "high_freqs", derivative_diff / distance
302291
)
292+
self.set_temp_derivative(i + 1, "high_freqs", derivative_diff / distance)
303293
self.move_temp_to_derivative()
304294

305295
def clear_temp_derivative(self) -> None:

angelslim/compressor/diffusion/cache/teacache_helper.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -81,9 +81,7 @@ def wrapped_forward(*args, **kwargs):
8181
else:
8282
is_last_single_block = block_id == len(self.single_blocks) - 1
8383
if blocktype == "single_blocks" and is_last_single_block:
84-
img_seq_len = self.cached_output[("double_blocks", 0)][0].shape[
85-
1
86-
]
84+
img_seq_len = self.cached_output[("double_blocks", 0)][0].shape[1]
8785
cached_output = result[:, :img_seq_len, ...]
8886
self.previous_residual = cached_output - self.cached_input
8987

angelslim/compressor/diffusion/kernels/python/gemm/fp8_gemm_torch.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,6 @@ def fp8_gemm_torch_block(
6767
# Dequantize A: expand scales to match tensor dimensions
6868
# a_s shape is typically [M, K//block_size]
6969
a_s_2d = a_s.view(M, -1) # [M, num_k_blocks]
70-
num_k_blocks = a_s_2d.shape[1]
7170

7271
# Dequantize by expanding scales
7372
a_dq = _dequantize_per_group(a_2d, a_s_2d, block_size, K)
@@ -121,9 +120,7 @@ def _dequantize_per_group(
121120
elif s_expanded.shape[1] < K:
122121
# Pad with last scale value
123122
pad_size = K - s_expanded.shape[1]
124-
s_expanded = torch.nn.functional.pad(
125-
s_expanded, (0, pad_size), mode="replicate"
126-
)
123+
s_expanded = torch.nn.functional.pad(s_expanded, (0, pad_size), mode="replicate")
127124

128125
return x_float * s_expanded
129126

@@ -146,7 +143,6 @@ def _dequantize_blockwise_2d(
146143
"""
147144
N, K = x.shape
148145
n_blocks, k_blocks = s.shape
149-
device = x.device
150146

151147
x_float = x.to(torch.float32)
152148
y = torch.empty_like(x_float)

0 commit comments

Comments
 (0)