Skip to content

Commit 04357f5

Browse files
committed
feat: add mixed precision support for model output consistency tests
1 parent 76af785 commit 04357f5

1 file changed

Lines changed: 3 additions & 72 deletions

File tree

tests/test_automodel_consistency.py

Lines changed: 3 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -115,8 +115,9 @@ def test_automodel_vs_onevision_encoder_model_output_consistency(
115115

116116
# Run inference with both models
117117
with torch.no_grad():
118-
auto_output = auto_model(pixel_values)
119-
onevision_output = onevision_model(pixel_values)
118+
with torch.amp.autocast(dtype=torch.bfloat16, device_type="cuda"):
119+
auto_output = auto_model(pixel_values)
120+
onevision_output = onevision_model(pixel_values)
120121

121122
# Compare last_hidden_state
122123
assert auto_output.last_hidden_state.shape == onevision_output.last_hidden_state.shape, (
@@ -239,76 +240,6 @@ def test_automodel_vs_onevision_encoder_model_eager_attention(
239240
del auto_model, onevision_model
240241
torch.cuda.empty_cache()
241242

242-
@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
243-
def test_automodel_vs_onevision_encoder_model_batch_input(
244-
self, model_name
245-
):
246-
"""
247-
Test output consistency with batched input.
248-
249-
This ensures both loading methods handle batch processing identically.
250-
"""
251-
from transformers import AutoModel, AutoImageProcessor
252-
253-
# Log transformers version
254-
current_version = get_current_transformers_version()
255-
print(f"\nRunning test with transformers version: {current_version}")
256-
257-
# Create multiple test images
258-
images = [create_test_image(seed=i) for i in range(3)]
259-
260-
# Load models
261-
auto_model = AutoModel.from_pretrained(
262-
model_name,
263-
trust_remote_code=True,
264-
attn_implementation="flash_attention_2"
265-
).to("cuda").eval()
266-
267-
onevision_model = OneVisionEncoderModel.from_pretrained(
268-
model_name,
269-
trust_remote_code=True,
270-
attn_implementation="flash_attention_2"
271-
).to("cuda").eval()
272-
273-
# Load preprocessor
274-
preprocessor = AutoImageProcessor.from_pretrained(
275-
model_name,
276-
trust_remote_code=True
277-
)
278-
279-
# Preprocess batch of images
280-
inputs = preprocessor(images=images, return_tensors="pt")
281-
pixel_values = inputs["pixel_values"].to("cuda")
282-
283-
# Run inference
284-
with torch.no_grad():
285-
auto_output = auto_model(pixel_values)
286-
onevision_output = onevision_model(pixel_values)
287-
288-
# Compare outputs
289-
assert auto_output.last_hidden_state.shape[0] == len(images), (
290-
f"Expected batch size {len(images)}, got {auto_output.last_hidden_state.shape[0]}"
291-
)
292-
293-
is_close = torch.allclose(
294-
auto_output.last_hidden_state,
295-
onevision_output.last_hidden_state,
296-
rtol=1e-4,
297-
atol=1e-4
298-
)
299-
300-
if not is_close:
301-
max_diff = (
302-
auto_output.last_hidden_state - onevision_output.last_hidden_state
303-
).abs().max().item()
304-
pytest.fail(
305-
f"Batch output mismatch!\n"
306-
f"Max difference: {max_diff}"
307-
)
308-
309-
# Clean up
310-
del auto_model, onevision_model
311-
torch.cuda.empty_cache()
312243

313244
@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
314245
def test_automodel_vs_onevision_encoder_model_dtype_consistency(

0 commit comments

Comments
 (0)