Skip to content

Commit 2837f06

Browse files
LessUpqwencoder
andcommitted
fix: resolve CI and Pages workflow errors
- Format test_attention.py and test_gemm.py with ruff to fix CI lint check - Add missing 'require fileutils' to Ruby script in pages.yml workflow Fixes: - CI workflow: Ruff format check failure (exit code 1) - Pages workflow: FileUtils.mkdir_p NameError in search index generation Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com>
1 parent 3463c31 commit 2837f06

3 files changed

Lines changed: 37 additions & 90 deletions

File tree

.github/workflows/pages.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ jobs:
4343
cat > build_search_index.rb << 'RUBY'
4444
require 'json'
4545
require 'yaml'
46+
require 'fileutils'
4647
4748
def extract_front_matter(content)
4849
if content =~ /\A---\s*\n(.*?)\n---\s*\n/m

tests/test_attention.py

Lines changed: 30 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -49,15 +49,9 @@ def test_attention_correctness_fp32(self, batch, heads, seq_len, head_dim, devic
4949
pytest.skip("CUDA kernels not built")
5050

5151
# Generate inputs
52-
q = torch.randn(
53-
batch, heads, seq_len, head_dim, device=device, dtype=torch.float32
54-
)
55-
k = torch.randn(
56-
batch, heads, seq_len, head_dim, device=device, dtype=torch.float32
57-
)
58-
v = torch.randn(
59-
batch, heads, seq_len, head_dim, device=device, dtype=torch.float32
60-
)
52+
q = torch.randn(batch, heads, seq_len, head_dim, device=device, dtype=torch.float32)
53+
k = torch.randn(batch, heads, seq_len, head_dim, device=device, dtype=torch.float32)
54+
v = torch.randn(batch, heads, seq_len, head_dim, device=device, dtype=torch.float32)
6155

6256
# Compute outputs
6357
output = naive_attention(q, k, v)
@@ -92,15 +86,9 @@ def test_attention_correctness_fp16(self, batch, heads, seq_len, head_dim, devic
9286
except ImportError:
9387
pytest.skip("CUDA kernels not built")
9488

95-
q = torch.randn(
96-
batch, heads, seq_len, head_dim, device=device, dtype=torch.float16
97-
)
98-
k = torch.randn(
99-
batch, heads, seq_len, head_dim, device=device, dtype=torch.float16
100-
)
101-
v = torch.randn(
102-
batch, heads, seq_len, head_dim, device=device, dtype=torch.float16
103-
)
89+
q = torch.randn(batch, heads, seq_len, head_dim, device=device, dtype=torch.float16)
90+
k = torch.randn(batch, heads, seq_len, head_dim, device=device, dtype=torch.float16)
91+
v = torch.randn(batch, heads, seq_len, head_dim, device=device, dtype=torch.float16)
10492

10593
output = naive_attention(q, k, v)
10694
reference, _ = compute_attention_reference(q, k, v)
@@ -149,17 +137,16 @@ def test_softmax_invariants(self, batch, heads, seq_len, head_dim, device):
149137

150138
# Property 2: Sum equals 1
151139
row_sums = softmax_output.sum(dim=-1)
152-
assert torch.allclose(
153-
row_sums, torch.ones_like(row_sums), rtol=1e-5, atol=1e-5
154-
), "Softmax row sums should equal 1"
140+
assert torch.allclose(row_sums, torch.ones_like(row_sums), rtol=1e-5, atol=1e-5), (
141+
"Softmax row sums should equal 1"
142+
)
155143

156144
# Property 3: Monotonicity (larger input -> larger output)
157145
for i in range(min(5, seq_len - 1)): # Check a few pairs
158146
idx1, idx2 = i, i + 1
159147
mask = scores[..., idx1] > scores[..., idx2]
160148
assert (
161-
softmax_output[..., idx1][mask]
162-
>= softmax_output[..., idx2][mask] - 1e-6
149+
softmax_output[..., idx1][mask] >= softmax_output[..., idx2][mask] - 1e-6
163150
).all(), "Softmax should preserve relative order"
164151

165152

@@ -189,15 +176,9 @@ def test_flash_attention_consistency(self, batch, heads, seq_len, head_dim, devi
189176
except ImportError:
190177
pytest.skip("CUDA kernels not built")
191178

192-
q = torch.randn(
193-
batch, heads, seq_len, head_dim, device=device, dtype=torch.float32
194-
)
195-
k = torch.randn(
196-
batch, heads, seq_len, head_dim, device=device, dtype=torch.float32
197-
)
198-
v = torch.randn(
199-
batch, heads, seq_len, head_dim, device=device, dtype=torch.float32
200-
)
179+
q = torch.randn(batch, heads, seq_len, head_dim, device=device, dtype=torch.float32)
180+
k = torch.randn(batch, heads, seq_len, head_dim, device=device, dtype=torch.float32)
181+
v = torch.randn(batch, heads, seq_len, head_dim, device=device, dtype=torch.float32)
201182

202183
flash_output = flash_attention(q, k, v)
203184
naive_output = naive_attention(q, k, v)
@@ -233,15 +214,9 @@ def test_causal_mask_correctness(self, batch, heads, seq_len, head_dim, device):
233214
except ImportError:
234215
pytest.skip("CUDA kernels not built")
235216

236-
q = torch.randn(
237-
batch, heads, seq_len, head_dim, device=device, dtype=torch.float32
238-
)
239-
k = torch.randn(
240-
batch, heads, seq_len, head_dim, device=device, dtype=torch.float32
241-
)
242-
v = torch.randn(
243-
batch, heads, seq_len, head_dim, device=device, dtype=torch.float32
244-
)
217+
q = torch.randn(batch, heads, seq_len, head_dim, device=device, dtype=torch.float32)
218+
k = torch.randn(batch, heads, seq_len, head_dim, device=device, dtype=torch.float32)
219+
v = torch.randn(batch, heads, seq_len, head_dim, device=device, dtype=torch.float32)
245220

246221
# Compute causal attention
247222
causal_output = flash_attention(q, k, v, is_causal=True)
@@ -260,9 +235,9 @@ def test_causal_mask_correctness(self, batch, heads, seq_len, head_dim, device):
260235

261236
# Verify attention weights are lower triangular
262237
upper_triangle = torch.triu(attn_weights, diagonal=1)
263-
assert torch.allclose(
264-
upper_triangle, torch.zeros_like(upper_triangle), atol=1e-6
265-
), "Causal attention weights should be lower triangular"
238+
assert torch.allclose(upper_triangle, torch.zeros_like(upper_triangle), atol=1e-6), (
239+
"Causal attention weights should be lower triangular"
240+
)
266241

267242

268243
class TestTiledAttention:
@@ -290,15 +265,9 @@ def test_tiled_attention_consistency(self, batch, heads, seq_len, head_dim, devi
290265
except ImportError:
291266
pytest.skip("CUDA kernels not built")
292267

293-
q = torch.randn(
294-
batch, heads, seq_len, head_dim, device=device, dtype=torch.float32
295-
)
296-
k = torch.randn(
297-
batch, heads, seq_len, head_dim, device=device, dtype=torch.float32
298-
)
299-
v = torch.randn(
300-
batch, heads, seq_len, head_dim, device=device, dtype=torch.float32
301-
)
268+
q = torch.randn(batch, heads, seq_len, head_dim, device=device, dtype=torch.float32)
269+
k = torch.randn(batch, heads, seq_len, head_dim, device=device, dtype=torch.float32)
270+
v = torch.randn(batch, heads, seq_len, head_dim, device=device, dtype=torch.float32)
302271

303272
tiled_output = tiled_attention(q, k, v)
304273
naive_output = naive_attention(q, k, v)
@@ -332,15 +301,9 @@ def test_tiled_attention_correctness_fp16(self, batch, heads, seq_len, head_dim,
332301
except ImportError:
333302
pytest.skip("CUDA kernels not built")
334303

335-
q = torch.randn(
336-
batch, heads, seq_len, head_dim, device=device, dtype=torch.float16
337-
)
338-
k = torch.randn(
339-
batch, heads, seq_len, head_dim, device=device, dtype=torch.float16
340-
)
341-
v = torch.randn(
342-
batch, heads, seq_len, head_dim, device=device, dtype=torch.float16
343-
)
304+
q = torch.randn(batch, heads, seq_len, head_dim, device=device, dtype=torch.float16)
305+
k = torch.randn(batch, heads, seq_len, head_dim, device=device, dtype=torch.float16)
306+
v = torch.randn(batch, heads, seq_len, head_dim, device=device, dtype=torch.float16)
344307

345308
output = tiled_attention(q, k, v)
346309
reference, _ = compute_attention_reference(q, k, v)
@@ -373,8 +336,9 @@ def test_tiled_attention_scale_parameter(self, device):
373336
output_custom = tiled_attention(q, k, v, scale=custom_scale)
374337

375338
# Outputs should be different
376-
assert not torch.allclose(output_default, output_custom), \
339+
assert not torch.allclose(output_default, output_custom), (
377340
"Custom scale should produce different output"
341+
)
378342

379343

380344
class TestNaiveAttentionErrorHandling:
@@ -644,15 +608,9 @@ def test_batch_multihead_support(self, batch, heads, seq_len, head_dim, device):
644608
except ImportError:
645609
pytest.skip("CUDA kernels not built")
646610

647-
q = torch.randn(
648-
batch, heads, seq_len, head_dim, device=device, dtype=torch.float32
649-
)
650-
k = torch.randn(
651-
batch, heads, seq_len, head_dim, device=device, dtype=torch.float32
652-
)
653-
v = torch.randn(
654-
batch, heads, seq_len, head_dim, device=device, dtype=torch.float32
655-
)
611+
q = torch.randn(batch, heads, seq_len, head_dim, device=device, dtype=torch.float32)
612+
k = torch.randn(batch, heads, seq_len, head_dim, device=device, dtype=torch.float32)
613+
v = torch.randn(batch, heads, seq_len, head_dim, device=device, dtype=torch.float32)
656614

657615
output = flash_attention(q, k, v)
658616

tests/test_gemm.py

Lines changed: 6 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -304,9 +304,7 @@ def test_unaligned_dimensions(self, M, N, K, device):
304304
output = gemm(a, b)
305305
reference = torch.matmul(a, b)
306306

307-
assert output.shape == (M, N), (
308-
f"Output shape mismatch: {output.shape} vs {(M, N)}"
309-
)
307+
assert output.shape == (M, N), f"Output shape mismatch: {output.shape} vs {(M, N)}"
310308
assert_close(
311309
output,
312310
reference,
@@ -390,9 +388,7 @@ def test_int8_gemm_correctness(self, M, N, K, device):
390388
reference = torch.matmul(a.to(torch.int32), b.to(torch.int32))
391389

392390
assert output.dtype == torch.int32, f"Expected INT32 output, got {output.dtype}"
393-
assert output.shape == (M, N), (
394-
f"Output shape mismatch: {output.shape} vs {(M, N)}"
395-
)
391+
assert output.shape == (M, N), f"Output shape mismatch: {output.shape} vs {(M, N)}"
396392
assert torch.equal(output, reference), (
397393
f"INT8 GEMM mismatch. Max diff: {(output - reference).abs().max().item()}"
398394
)
@@ -485,9 +481,7 @@ def test_tall_skinny_matrices(self, device):
485481
output = gemm(a, b)
486482
reference = torch.matmul(a, b)
487483

488-
assert_close(
489-
output, reference, rtol=1e-3, atol=1e-3, msg="Tall-skinny GEMM failed"
490-
)
484+
assert_close(output, reference, rtol=1e-3, atol=1e-3, msg="Tall-skinny GEMM failed")
491485

492486
@pytest.mark.cuda
493487
def test_alpha_beta_scaling(self, device):
@@ -506,9 +500,7 @@ def test_alpha_beta_scaling(self, device):
506500
output = gemm(a, b, alpha=alpha)
507501
reference = alpha * torch.matmul(a, b)
508502

509-
assert_close(
510-
output, reference, rtol=1e-3, atol=1e-3, msg="Alpha scaling failed"
511-
)
503+
assert_close(output, reference, rtol=1e-3, atol=1e-3, msg="Alpha scaling failed")
512504

513505
@pytest.mark.cuda
514506
def test_beta_parameter(self, device):
@@ -525,9 +517,7 @@ def test_beta_parameter(self, device):
525517
# Test with beta=0 (default, should be same as just alpha * A @ B)
526518
output_beta0 = gemm(a, b, alpha=1.0, beta=0.0)
527519
reference = torch.matmul(a, b)
528-
assert_close(
529-
output_beta0, reference, rtol=1e-3, atol=1e-3, msg="Beta=0 failed"
530-
)
520+
assert_close(output_beta0, reference, rtol=1e-3, atol=1e-3, msg="Beta=0 failed")
531521

532522
@pytest.mark.cuda
533523
def test_combined_alpha_beta(self, device):
@@ -547,6 +537,4 @@ def test_combined_alpha_beta(self, device):
547537
output = gemm(a, b, alpha=alpha, beta=beta)
548538
# Note: beta is currently unused in the implementation, so output should just be alpha * A @ B
549539
reference = alpha * torch.matmul(a, b)
550-
assert_close(
551-
output, reference, rtol=1e-3, atol=1e-3, msg="Combined alpha/beta failed"
552-
)
540+
assert_close(output, reference, rtol=1e-3, atol=1e-3, msg="Combined alpha/beta failed")

0 commit comments

Comments
 (0)