@@ -6529,70 +6529,55 @@ def test_qwen2_5(self):
65296529
65306530
65316531class TestExampleMultimodalityScript (TestQNN ):
6532- def test_smolvlm_500m_instruct (self ):
6533- if not self .required_envs ():
6534- self .skipTest ("missing required envs" )
65356532
6536- prompt = "Can you describe this image?"
6537- cmds = [
6538- "python" ,
6539- f"{ self .executorch_root } /examples/qualcomm/oss_scripts/llama/llama.py" ,
6540- "--artifact" ,
6541- self .artifact_dir ,
6542- "--build_folder" ,
6543- self .build_folder ,
6544- "--model" ,
6545- self .model ,
6546- "--ip" ,
6547- self .ip ,
6548- "--port" ,
6549- str (self .port ),
6550- "--prompt" ,
6551- prompt ,
6552- "--temperature" ,
6553- "0" ,
6554- "--decoder_model" ,
6555- "smolvlm_500m_instruct" ,
6556- "--model_mode" ,
6557- "kv" ,
6558- "--max_seq_len" ,
6559- "128" ,
6560- ]
6561- if self .compile_only :
6562- cmds .extend (["--compile_only" ])
6563- elif self .device :
6564- cmds .extend (["--device" , self .device ])
6565- if self .host :
6566- cmds .extend (["--host" , self .host ])
6567- elif self .enable_x86_64 :
6568- cmds .extend (["--enable_x86_64" ])
6569- if self .pre_gen_pte :
6570- cmds .extend (["--pre_gen_pte" , self .pre_gen_pte ])
6533+ @dataclass (frozen = True )
6534+ class MLLMSpecs :
6535+ max_seq_len : int
6536+ sm8650_token_rate : float
6537+ sm8750_token_rate : float
6538+ encoder_pte_size : float
6539+ text_embedding_pte_size : float
6540+ decoder_pte_size : float
65716541
6572- p = subprocess .Popen (cmds , stdout = subprocess .DEVNULL )
6573- with Listener ((self .ip , self .port )) as listener :
6574- conn = listener .accept ()
6575- p .communicate ()
6576- msg = json .loads (conn .recv ())
6577- if "Error" in msg :
6578- self .fail (msg ["Error" ])
6579- else :
6580- if not self .enable_x86_64 :
6581- encoder_pte_size = msg ["encoder_pte_size" ]
6582- text_embedding_pte_size = msg ["text_embedding_pte_size" ]
6583- decoder_pte_size = msg ["pte_size" ]
6584- self .assertLessEqual (encoder_pte_size , 110_000_000 ) # 110MB
6585- self .assertLessEqual (text_embedding_pte_size , 100_000_000 ) # 100MB
6586- self .assertLessEqual (decoder_pte_size , 400_000_000 ) # 400MB
6587- print (f"Encoder PTE Size: { encoder_pte_size } bytes" )
6588- print (f"Text Embedding PTE Size: { text_embedding_pte_size } bytes" )
6589- print (f"Decoder PTE Size: { decoder_pte_size } bytes" )
6542+ @dataclass (frozen = True )
6543+ class VLMSpecs (MLLMSpecs ):
6544+ image_path : str
6545+ golden_image_feature : str
65906546
6591- def test_internvl3_1b (self ):
6592- if not self .required_envs ():
6547+ # TODO: refactor to support different backends
6548+ def setUp (self ):
6549+ self .vlm_specs = {
6550+ "smolvlm_500m_instruct" : TestExampleMultimodalityScript .VLMSpecs (
6551+ max_seq_len = 128 ,
6552+ sm8650_token_rate = 50 ,
6553+ sm8750_token_rate = 55 ,
6554+ encoder_pte_size = 110_000_000 , # 110MB
6555+ text_embedding_pte_size = 100_000_000 , # 100MB
6556+ decoder_pte_size = 400_000_000 , # 400MB
6557+ image_path = "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" , # New York Bay
6558+ golden_image_feature = "city" ,
6559+ ),
6560+ "internvl3_1b" : TestExampleMultimodalityScript .VLMSpecs (
6561+ max_seq_len = 320 ,
6562+ sm8650_token_rate = 11 ,
6563+ sm8750_token_rate = 13 ,
6564+ encoder_pte_size = 425_000_000 , # 425MB
6565+ text_embedding_pte_size = 300_000_000 , # 300MB
6566+ decoder_pte_size = 550_000_000 , # 550 MB
6567+ image_path = "http://images.cocodataset.org/val2017/000000039769.jpg" , # Two cats lying on a blanket
6568+ golden_image_feature = "cats" ,
6569+ ),
6570+ }
6571+
6572+ def test_static_vlm (self ):
6573+ if not self .required_envs ([self .model_name ]):
65936574 self .skipTest ("missing required envs" )
65946575
6576+ vlm_specs : TestExampleMultimodalityScript .VLMSpecs = self .vlm_specs [
6577+ self .model_name
6578+ ]
65956579 prompt = "Can you describe this image?"
6580+ image_path = vlm_specs .image_path
65966581 cmds = [
65976582 "python" ,
65986583 f"{ self .executorch_root } /examples/qualcomm/oss_scripts/llama/llama.py" ,
@@ -6608,14 +6593,16 @@ def test_internvl3_1b(self):
66086593 str (self .port ),
66096594 "--prompt" ,
66106595 prompt ,
6596+ "--image_path" ,
6597+ image_path ,
66116598 "--temperature" ,
66126599 "0" ,
66136600 "--decoder_model" ,
6614- "internvl3_1b " ,
6601+ f" { self . model_name } " ,
66156602 "--model_mode" ,
66166603 "kv" ,
66176604 "--max_seq_len" ,
6618- "320 " ,
6605+ f" { vlm_specs . max_seq_len } " ,
66196606 ]
66206607 if self .compile_only :
66216608 cmds .extend (["--compile_only" ])
@@ -6636,17 +6623,41 @@ def test_internvl3_1b(self):
66366623 if "Error" in msg :
66376624 self .fail (msg ["Error" ])
66386625 else :
6626+ if not self .compile_only :
6627+ model_out = msg ["result" ][0 ]
6628+ self .assertTrue (
6629+ vlm_specs .golden_image_feature in model_out ,
6630+ f"Expected Output contains feature: '{ vlm_specs .golden_image_feature } ' Actual Output: '{ model_out } '" ,
6631+ )
6632+ print (f"Image Path: { image_path } " )
6633+ print (f"Query: { prompt } " )
6634+ print (f"Answer: { model_out } " )
66396635 if not self .enable_x86_64 :
66406636 encoder_pte_size = msg ["encoder_pte_size" ]
66416637 text_embedding_pte_size = msg ["text_embedding_pte_size" ]
66426638 decoder_pte_size = msg ["pte_size" ]
6643- self .assertLessEqual (encoder_pte_size , 425_000_000 ) # 425MB
6644- self .assertLessEqual (text_embedding_pte_size , 300_000_000 ) # 300MB
6645- self .assertLessEqual (decoder_pte_size , 550_000_000 ) # 550MB
6639+ self .assertLessEqual (encoder_pte_size , vlm_specs .encoder_pte_size )
6640+ self .assertLessEqual (
6641+ text_embedding_pte_size , vlm_specs .text_embedding_pte_size
6642+ )
6643+ self .assertLessEqual (decoder_pte_size , vlm_specs .decoder_pte_size )
66466644 print (f"Encoder PTE Size: { encoder_pte_size } bytes" )
66476645 print (f"Text Embedding PTE Size: { text_embedding_pte_size } bytes" )
66486646 print (f"Decoder PTE Size: { decoder_pte_size } bytes" )
66496647
6648+ attr_name = f"{ self .model .lower ()} _token_rate"
6649+ if (
6650+ not self .compile_only
6651+ and not self .enable_x86_64
6652+ and hasattr (vlm_specs , attr_name )
6653+ ):
6654+ device_inference_speed = msg ["inference_speed" ]
6655+ expected_inference_speed = getattr (vlm_specs , attr_name )
6656+ print (f"Prompt Evaluation: { device_inference_speed } tokens/second" )
6657+ self .assertGreaterEqual (
6658+ device_inference_speed , expected_inference_speed
6659+ )
6660+
66506661
66516662class TestExampleOssScript (TestQNN ):
66526663 def test_albert (self ):
0 commit comments