Skip to content

Commit 322857a

Browse files
bdemirbBaris Demir
andauthored
Arm backend: Fix flaky INT comparisons (#18103)
cc @digantdesai @freddan80 @per @zingo @oscarandersson8218 @mansnils @Sebastian-Larsson @robell Signed-off-by: Baris Demir <baris.demir@arm.com> Co-authored-by: Baris Demir <baris.demir@arm.com>
1 parent 6821c1a commit 322857a

2 files changed

Lines changed: 49 additions & 14 deletions

File tree

backends/arm/test/ops/test_ceil.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,11 @@ def forward(self, x: torch.Tensor):
2828

2929
zeros = torch.zeros(1, 10, 10, 10)
3030
ones = torch.ones(10, 10, 10)
31-
rand = torch.rand(10, 10) - 0.5
31+
_rng = torch.Generator().manual_seed(0)
32+
# Keep values away from integer boundaries to avoid unstable ceil flips due to
33+
# tiny quantization noise, while still covering mixed-sign random data.
34+
rand_raw = torch.rand(10, 10, generator=_rng) - 0.5
35+
rand = torch.where(rand_raw >= 0, rand_raw + 0.1, rand_raw - 0.1)
3236
randn_pos = torch.randn(1, 4, 4, 4) + 10
3337
randn_neg = torch.randn(1, 4, 4, 4) - 10
3438
ramp = torch.arange(-16, 16, 0.2)

backends/arm/test/tester/analyze_output_utils.py

Lines changed: 44 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -370,22 +370,35 @@ def compare_rel_frobenius_and_cosine_similarity(
370370
- Inf values will be set to max/min representable by the dtype * quantization scale
371371
- Values lower than the scale will be set to 0.0
372372
If the reference is all zeros, the function returns without testing.
373+
374+
To reduce false positives in quantized testing, the Frobenius check is
375+
skipped when reference norm is at quantization-noise scale, and a small
376+
Frobenius overflow is accepted when cosine similarity is very high.
373377
"""
374378

379+
quant_scale_for_guards: float | None = None
380+
posinf_value: float | None = None
381+
neginf_value: float | None = None
375382
if clean_reference:
376383
if quantization_parameters:
377384
scale = quantization_parameters.scale
385+
assert isinstance(
386+
scale, (torch.Tensor, int, float)
387+
), f"Unsupported quantization scale type: {type(scale)!r}"
388+
quant_scale_for_guards = (
389+
float(scale.max().item())
390+
if isinstance(scale, torch.Tensor)
391+
else float(scale)
392+
)
378393
dtype_info = torch.iinfo(quantization_parameters.dtype)
379-
_max = dtype_info.max * scale
380-
_min = dtype_info.min * scale
394+
assert quant_scale_for_guards is not None
395+
posinf_value = float(dtype_info.max) * quant_scale_for_guards
396+
neginf_value = float(dtype_info.min) * quant_scale_for_guards
381397
reference_output = reference_output.where(
382398
torch.abs(reference_output) >= scale, 0.0
383399
)
384-
else:
385-
_max = None
386-
_min = None
387400
reference_output = reference_output.nan_to_num(
388-
nan=0.0, posinf=_max, neginf=_min
401+
nan=0.0, posinf=posinf_value, neginf=neginf_value
389402
)
390403

391404
reference_all_zeros = torch.count_nonzero(reference_output).item() == 0
@@ -403,14 +416,32 @@ def compare_rel_frobenius_and_cosine_similarity(
403416
test_output.flatten(), reference_output.flatten(), dim=0
404417
).item()
405418

406-
if (
407-
frobenius_threshold is not None
408-
and relative_frobenius_error > frobenius_threshold
409-
):
410-
raise AssertionError(
411-
f"Tensor-wise comparison failed: Relative frobenius norm error {relative_frobenius_error} exceeds threshold {frobenius_threshold}."
412-
f" (Cosine similarity: {cosine_similarity}, threshold {cosine_threshold})."
419+
# Relative Frobenius is unstable when the reference norm is at quantization-noise scale.
420+
reference_numel_sqrt = reference_output.numel() ** 0.5
421+
low_norm_floor = 1e-8
422+
if quant_scale_for_guards is not None:
423+
low_norm_floor = max(
424+
low_norm_floor, quant_scale_for_guards * reference_numel_sqrt
413425
)
426+
run_frobenius_check = reference_frobenius_norm > low_norm_floor
427+
428+
if run_frobenius_check and frobenius_threshold is not None:
429+
# If cosine is very high, slightly discount Frobenius error to avoid
430+
# borderline failures dominated by quantization noise.
431+
high_cosine_floor = (
432+
max(0.98, cosine_threshold) if cosine_threshold is not None else 0.98
433+
)
434+
effective_relative_frobenius_error = relative_frobenius_error
435+
if cosine_similarity >= high_cosine_floor:
436+
effective_relative_frobenius_error = max(
437+
0.0, relative_frobenius_error - 0.02
438+
)
439+
440+
if effective_relative_frobenius_error > frobenius_threshold:
441+
raise AssertionError(
442+
f"Tensor-wise comparison failed: Relative frobenius norm error {relative_frobenius_error} exceeds threshold {frobenius_threshold}."
443+
f" (Cosine similarity: {cosine_similarity}, threshold {cosine_threshold})."
444+
)
414445

415446
if (
416447
cosine_threshold is not None

0 commit comments

Comments
 (0)