Skip to content

Commit d6fa91b

Browse files
Copilotanxiangsir
andcommitted
Add direct class instantiation tests for OneVisionEncoderModel
Added TestDirectClassInstantiation class with tests that directly instantiate OneVisionEncoderModel (without AutoModel) to verify output consistency across transformers versions: - test_direct_instantiation_flash_attention - test_direct_instantiation_eager_attention - test_direct_instantiation_flash_vs_eager_consistency - test_direct_instantiation_deterministic_output - test_direct_instantiation_bfloat16 Co-authored-by: anxiangsir <31175974+anxiangsir@users.noreply.github.com>
1 parent 36009ad commit d6fa91b

1 file changed

Lines changed: 309 additions & 0 deletions

File tree

tests/test_automodel_consistency.py

Lines changed: 309 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -316,6 +316,315 @@ def test_automodel_vs_onevision_encoder_model_dtype_consistency(
316316
torch.cuda.empty_cache()
317317

318318

319+
class TestDirectClassInstantiation:
320+
"""
321+
Tests for output consistency using direct class instantiation.
322+
323+
These tests do NOT use AutoModel - they directly instantiate OneVisionEncoderModel
324+
class to verify output consistency across different transformers versions.
325+
"""
326+
327+
@pytest.fixture
328+
def test_image(self):
329+
"""Create a sample test image."""
330+
return create_test_image()
331+
332+
@pytest.fixture
333+
def model_name(self):
334+
"""Model name for loading from HuggingFace."""
335+
return "lmms-lab-encoder/onevision-encoder-large"
336+
337+
@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
338+
def test_direct_instantiation_flash_attention(self, test_image, model_name):
339+
"""
340+
Test direct class instantiation with flash_attention_2.
341+
342+
This test directly uses OneVisionEncoderModel.from_pretrained()
343+
without AutoModel to verify output consistency.
344+
"""
345+
from transformers import AutoImageProcessor
346+
from onevision_encoder import OneVisionEncoderModel
347+
348+
current_version = get_current_transformers_version()
349+
print(f"\nRunning direct instantiation test with transformers version: {current_version}")
350+
351+
# Load model directly using OneVisionEncoderModel class
352+
model = OneVisionEncoderModel.from_pretrained(
353+
model_name,
354+
trust_remote_code=True,
355+
attn_implementation="flash_attention_2"
356+
).to("cuda").eval()
357+
358+
# Load preprocessor
359+
preprocessor = AutoImageProcessor.from_pretrained(
360+
model_name,
361+
trust_remote_code=True
362+
)
363+
364+
# Preprocess image
365+
inputs = preprocessor(images=test_image, return_tensors="pt")
366+
pixel_values = inputs["pixel_values"].to("cuda")
367+
368+
# Run inference
369+
with torch.no_grad():
370+
with torch.amp.autocast(dtype=torch.bfloat16, device_type="cuda"):
371+
output = model(pixel_values)
372+
373+
# Verify output shape and values
374+
assert output.last_hidden_state is not None, "last_hidden_state should not be None"
375+
assert output.last_hidden_state.shape[0] == 1, "Batch size should be 1"
376+
assert not torch.isnan(output.last_hidden_state).any(), "Output contains NaN values"
377+
assert not torch.isinf(output.last_hidden_state).any(), "Output contains Inf values"
378+
379+
print(f"Output shape: {output.last_hidden_state.shape}")
380+
print(f"Output stats: min={output.last_hidden_state.min().item():.4f}, "
381+
f"max={output.last_hidden_state.max().item():.4f}, "
382+
f"mean={output.last_hidden_state.mean().item():.4f}")
383+
384+
# Clean up
385+
del model
386+
torch.cuda.empty_cache()
387+
388+
@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
389+
def test_direct_instantiation_eager_attention(self, test_image, model_name):
390+
"""
391+
Test direct class instantiation with eager attention.
392+
393+
This test directly uses OneVisionEncoderModel.from_pretrained()
394+
with eager attention implementation.
395+
"""
396+
from transformers import AutoImageProcessor
397+
from onevision_encoder import OneVisionEncoderModel
398+
399+
current_version = get_current_transformers_version()
400+
print(f"\nRunning direct instantiation (eager) test with transformers version: {current_version}")
401+
402+
# Load model directly using OneVisionEncoderModel class with eager attention
403+
model = OneVisionEncoderModel.from_pretrained(
404+
model_name,
405+
trust_remote_code=True,
406+
attn_implementation="eager"
407+
).to("cuda").eval()
408+
409+
# Load preprocessor
410+
preprocessor = AutoImageProcessor.from_pretrained(
411+
model_name,
412+
trust_remote_code=True
413+
)
414+
415+
# Preprocess image
416+
inputs = preprocessor(images=test_image, return_tensors="pt")
417+
pixel_values = inputs["pixel_values"].to("cuda")
418+
419+
# Run inference
420+
with torch.no_grad():
421+
output = model(pixel_values)
422+
423+
# Verify output shape and values
424+
assert output.last_hidden_state is not None, "last_hidden_state should not be None"
425+
assert output.last_hidden_state.shape[0] == 1, "Batch size should be 1"
426+
assert not torch.isnan(output.last_hidden_state).any(), "Output contains NaN values"
427+
assert not torch.isinf(output.last_hidden_state).any(), "Output contains Inf values"
428+
429+
print(f"Output shape: {output.last_hidden_state.shape}")
430+
print(f"Output stats: min={output.last_hidden_state.min().item():.4f}, "
431+
f"max={output.last_hidden_state.max().item():.4f}, "
432+
f"mean={output.last_hidden_state.mean().item():.4f}")
433+
434+
# Clean up
435+
del model
436+
torch.cuda.empty_cache()
437+
438+
@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
439+
def test_direct_instantiation_flash_vs_eager_consistency(self, test_image, model_name):
440+
"""
441+
Test output consistency between flash_attention_2 and eager attention.
442+
443+
This test directly instantiates two models with different attention
444+
implementations and compares their outputs to verify consistency.
445+
"""
446+
from transformers import AutoImageProcessor
447+
from onevision_encoder import OneVisionEncoderModel
448+
449+
current_version = get_current_transformers_version()
450+
print(f"\nRunning flash vs eager consistency test with transformers version: {current_version}")
451+
452+
# Load model with flash_attention_2
453+
model_flash = OneVisionEncoderModel.from_pretrained(
454+
model_name,
455+
trust_remote_code=True,
456+
attn_implementation="flash_attention_2"
457+
).to("cuda").eval()
458+
459+
# Load model with eager attention
460+
model_eager = OneVisionEncoderModel.from_pretrained(
461+
model_name,
462+
trust_remote_code=True,
463+
attn_implementation="eager"
464+
).to("cuda").eval()
465+
466+
# Load preprocessor
467+
preprocessor = AutoImageProcessor.from_pretrained(
468+
model_name,
469+
trust_remote_code=True
470+
)
471+
472+
# Preprocess image
473+
inputs = preprocessor(images=test_image, return_tensors="pt")
474+
pixel_values = inputs["pixel_values"].to("cuda")
475+
476+
# Run inference with both models
477+
with torch.no_grad():
478+
with torch.amp.autocast(dtype=torch.bfloat16, device_type="cuda"):
479+
output_flash = model_flash(pixel_values)
480+
output_eager = model_eager(pixel_values)
481+
482+
# Compare shapes
483+
assert output_flash.last_hidden_state.shape == output_eager.last_hidden_state.shape, (
484+
f"Shape mismatch: flash={output_flash.last_hidden_state.shape}, "
485+
f"eager={output_eager.last_hidden_state.shape}"
486+
)
487+
488+
# Compare outputs (allow some tolerance due to different implementations)
489+
max_diff = (output_flash.last_hidden_state - output_eager.last_hidden_state).abs().max().item()
490+
mean_diff = (output_flash.last_hidden_state - output_eager.last_hidden_state).abs().mean().item()
491+
492+
print(f"Flash vs Eager comparison:")
493+
print(f" Max difference: {max_diff:.6f}")
494+
print(f" Mean difference: {mean_diff:.6f}")
495+
496+
# Flash and Eager attention produce similar but not identical results due to:
497+
# 1. Different numerical algorithms (FlashAttention uses online softmax)
498+
# 2. bfloat16 precision limitations with autocast
499+
# 3. Different memory access patterns affecting floating point accumulation
500+
# Tolerance of 1e-2 is appropriate for this comparison
501+
FLASH_EAGER_RTOL = 1e-2
502+
FLASH_EAGER_ATOL = 1e-2
503+
is_close = torch.allclose(
504+
output_flash.last_hidden_state,
505+
output_eager.last_hidden_state,
506+
rtol=FLASH_EAGER_RTOL,
507+
atol=FLASH_EAGER_ATOL
508+
)
509+
510+
if not is_close:
511+
pytest.fail(
512+
f"Output mismatch between flash_attention_2 and eager!\n"
513+
f"Max difference: {max_diff}\n"
514+
f"Mean difference: {mean_diff}\n"
515+
f"Flash stats: min={output_flash.last_hidden_state.min()}, max={output_flash.last_hidden_state.max()}\n"
516+
f"Eager stats: min={output_eager.last_hidden_state.min()}, max={output_eager.last_hidden_state.max()}"
517+
)
518+
519+
# Clean up
520+
del model_flash, model_eager
521+
torch.cuda.empty_cache()
522+
523+
@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
524+
def test_direct_instantiation_deterministic_output(self, test_image, model_name):
525+
"""
526+
Test that direct class instantiation produces deterministic outputs.
527+
528+
Running the same model twice with the same input should produce
529+
identical outputs.
530+
"""
531+
from transformers import AutoImageProcessor
532+
from onevision_encoder import OneVisionEncoderModel
533+
534+
current_version = get_current_transformers_version()
535+
print(f"\nRunning deterministic output test with transformers version: {current_version}")
536+
537+
# Load model
538+
model = OneVisionEncoderModel.from_pretrained(
539+
model_name,
540+
trust_remote_code=True,
541+
attn_implementation="flash_attention_2"
542+
).to("cuda").eval()
543+
544+
# Load preprocessor
545+
preprocessor = AutoImageProcessor.from_pretrained(
546+
model_name,
547+
trust_remote_code=True
548+
)
549+
550+
# Preprocess image
551+
inputs = preprocessor(images=test_image, return_tensors="pt")
552+
pixel_values = inputs["pixel_values"].to("cuda")
553+
554+
# Run inference twice
555+
# Note: Using autocast with bfloat16 during eval mode should be deterministic
556+
# since dropout is disabled and no stochastic operations are performed
557+
with torch.no_grad():
558+
with torch.amp.autocast(dtype=torch.bfloat16, device_type="cuda"):
559+
output1 = model(pixel_values)
560+
output2 = model(pixel_values)
561+
562+
# Outputs should be identical
563+
is_identical = torch.equal(output1.last_hidden_state, output2.last_hidden_state)
564+
565+
if not is_identical:
566+
max_diff = (output1.last_hidden_state - output2.last_hidden_state).abs().max().item()
567+
pytest.fail(
568+
f"Non-deterministic output detected!\n"
569+
f"Max difference between two runs: {max_diff}"
570+
)
571+
572+
print("Deterministic output verified: two identical runs produce identical outputs")
573+
574+
# Clean up
575+
del model
576+
torch.cuda.empty_cache()
577+
578+
@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
579+
def test_direct_instantiation_bfloat16(self, test_image, model_name):
580+
"""
581+
Test direct class instantiation with bfloat16 dtype.
582+
"""
583+
from transformers import AutoImageProcessor
584+
from onevision_encoder import OneVisionEncoderModel
585+
586+
current_version = get_current_transformers_version()
587+
print(f"\nRunning bfloat16 test with transformers version: {current_version}")
588+
589+
# Load model with bfloat16
590+
model = OneVisionEncoderModel.from_pretrained(
591+
model_name,
592+
trust_remote_code=True,
593+
attn_implementation="flash_attention_2",
594+
torch_dtype=torch.bfloat16
595+
).to("cuda").eval()
596+
597+
# Load preprocessor
598+
preprocessor = AutoImageProcessor.from_pretrained(
599+
model_name,
600+
trust_remote_code=True
601+
)
602+
603+
# Preprocess image
604+
inputs = preprocessor(images=test_image, return_tensors="pt")
605+
pixel_values = inputs["pixel_values"].to("cuda", dtype=torch.bfloat16)
606+
607+
# Run inference
608+
with torch.no_grad():
609+
output = model(pixel_values)
610+
611+
# Verify dtype
612+
assert output.last_hidden_state.dtype == torch.bfloat16, (
613+
f"Expected bfloat16 output, got: {output.last_hidden_state.dtype}"
614+
)
615+
616+
# Verify no NaN/Inf
617+
assert not torch.isnan(output.last_hidden_state).any(), "Output contains NaN values"
618+
assert not torch.isinf(output.last_hidden_state).any(), "Output contains Inf values"
619+
620+
print(f"bfloat16 output verified: dtype={output.last_hidden_state.dtype}")
621+
print(f"Output shape: {output.last_hidden_state.shape}")
622+
623+
# Clean up
624+
del model
625+
torch.cuda.empty_cache()
626+
627+
319628
class TestTransformersVersionInfo:
320629
"""Test class to document and verify transformers version information."""
321630

0 commit comments

Comments
 (0)