Skip to content

Commit cdb8db3

Browse files
authored
Merge branch 'pytorch:main' into main
2 parents ba43422 + 06f10b9 commit cdb8db3

36 files changed

Lines changed: 3860 additions & 269 deletions

backends/arm/scripts/collect_testname_resources.py

Lines changed: 14 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -132,34 +132,19 @@ def _collect_arm_models(models_md: pathlib.Path) -> set[str]:
132132
return models
133133

134134

135-
def _collect_arm_ops() -> set[str]:
136-
"""
137-
Returns a mapping from names on the form to be used in unittests to edge op:
138-
1. Names are in lowercase.
139-
2. Overload is ignored if 'default', otherwise it's appended with an underscore.
140-
3. Overly verbose name are shortened by removing certain prefixes/suffixes.
141-
142-
Examples:
143-
abs.default -> abs
144-
split_copy.Tensor -> split_tensor
145-
"""
146-
ops: set[str] = set()
147-
for edge_name in _ALL_EDGE_OPS:
148-
op, overload = edge_name.split(".")
149-
150-
# Normalize names
151-
op = op.lower()
152-
op = op.removeprefix("_")
153-
op = op.removesuffix("_copy")
154-
op = op.removesuffix("_with_indices")
155-
overload = overload.lower()
156-
157-
if overload == "default":
158-
ops.add(op)
159-
else:
160-
ops.add(f"{op}_{overload}")
135+
def _normalize_op_name(edge_name: str) -> str:
136+
op, overload = edge_name.split(".")
137+
138+
op = op.lower()
139+
op = op.removeprefix("_")
140+
op = op.removesuffix("_copy")
141+
op = op.removesuffix("_with_indices")
161142

162-
return ops
143+
overload = overload.lower()
144+
if overload == "default":
145+
return op
146+
else:
147+
return f"{op}_{overload}"
163148

164149

165150
def _split_model_entry(entry: str) -> tuple[str, str | None, bool]:
@@ -190,7 +175,8 @@ def _camel_to_snake(name: str) -> str:
190175
return _CAMEL_BOUNDARY.sub("_", name).lower()
191176

192177

193-
OP_LIST = sorted(_collect_arm_ops())
178+
OP_NAME_MAP = {_normalize_op_name(edge_name): edge_name for edge_name in _ALL_EDGE_OPS}
179+
OP_LIST = sorted({_normalize_op_name(edge_name) for edge_name in _ALL_EDGE_OPS})
194180
PASS_LIST = sorted(
195181
_collect_arm_passes(pathlib.Path("backends/arm/_passes/__init__.py"))
196182
)

backends/qualcomm/tests/test_qnn_delegate.py

Lines changed: 74 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -6529,70 +6529,55 @@ def test_qwen2_5(self):
65296529

65306530

65316531
class TestExampleMultimodalityScript(TestQNN):
6532-
def test_smolvlm_500m_instruct(self):
6533-
if not self.required_envs():
6534-
self.skipTest("missing required envs")
65356532

6536-
prompt = "Can you describe this image?"
6537-
cmds = [
6538-
"python",
6539-
f"{self.executorch_root}/examples/qualcomm/oss_scripts/llama/llama.py",
6540-
"--artifact",
6541-
self.artifact_dir,
6542-
"--build_folder",
6543-
self.build_folder,
6544-
"--model",
6545-
self.model,
6546-
"--ip",
6547-
self.ip,
6548-
"--port",
6549-
str(self.port),
6550-
"--prompt",
6551-
prompt,
6552-
"--temperature",
6553-
"0",
6554-
"--decoder_model",
6555-
"smolvlm_500m_instruct",
6556-
"--model_mode",
6557-
"kv",
6558-
"--max_seq_len",
6559-
"128",
6560-
]
6561-
if self.compile_only:
6562-
cmds.extend(["--compile_only"])
6563-
elif self.device:
6564-
cmds.extend(["--device", self.device])
6565-
if self.host:
6566-
cmds.extend(["--host", self.host])
6567-
elif self.enable_x86_64:
6568-
cmds.extend(["--enable_x86_64"])
6569-
if self.pre_gen_pte:
6570-
cmds.extend(["--pre_gen_pte", self.pre_gen_pte])
6533+
@dataclass(frozen=True)
6534+
class MLLMSpecs:
6535+
max_seq_len: int
6536+
sm8650_token_rate: float
6537+
sm8750_token_rate: float
6538+
encoder_pte_size: float
6539+
text_embedding_pte_size: float
6540+
decoder_pte_size: float
65716541

6572-
p = subprocess.Popen(cmds, stdout=subprocess.DEVNULL)
6573-
with Listener((self.ip, self.port)) as listener:
6574-
conn = listener.accept()
6575-
p.communicate()
6576-
msg = json.loads(conn.recv())
6577-
if "Error" in msg:
6578-
self.fail(msg["Error"])
6579-
else:
6580-
if not self.enable_x86_64:
6581-
encoder_pte_size = msg["encoder_pte_size"]
6582-
text_embedding_pte_size = msg["text_embedding_pte_size"]
6583-
decoder_pte_size = msg["pte_size"]
6584-
self.assertLessEqual(encoder_pte_size, 110_000_000) # 110MB
6585-
self.assertLessEqual(text_embedding_pte_size, 100_000_000) # 100MB
6586-
self.assertLessEqual(decoder_pte_size, 400_000_000) # 400MB
6587-
print(f"Encoder PTE Size: {encoder_pte_size} bytes")
6588-
print(f"Text Embedding PTE Size: {text_embedding_pte_size} bytes")
6589-
print(f"Decoder PTE Size: {decoder_pte_size} bytes")
6542+
@dataclass(frozen=True)
6543+
class VLMSpecs(MLLMSpecs):
6544+
image_path: str
6545+
golden_image_feature: str
65906546

6591-
def test_internvl3_1b(self):
6592-
if not self.required_envs():
6547+
# TODO: refactor to support different backends
6548+
def setUp(self):
6549+
self.vlm_specs = {
6550+
"smolvlm_500m_instruct": TestExampleMultimodalityScript.VLMSpecs(
6551+
max_seq_len=128,
6552+
sm8650_token_rate=50,
6553+
sm8750_token_rate=55,
6554+
encoder_pte_size=110_000_000, # 110MB
6555+
text_embedding_pte_size=100_000_000, # 100MB
6556+
decoder_pte_size=400_000_000, # 400MB
6557+
image_path="https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg", # New York Bay
6558+
golden_image_feature="city",
6559+
),
6560+
"internvl3_1b": TestExampleMultimodalityScript.VLMSpecs(
6561+
max_seq_len=320,
6562+
sm8650_token_rate=11,
6563+
sm8750_token_rate=13,
6564+
encoder_pte_size=425_000_000, # 425MB
6565+
text_embedding_pte_size=300_000_000, # 300MB
6566+
decoder_pte_size=550_000_000, # 550 MB
6567+
image_path="http://images.cocodataset.org/val2017/000000039769.jpg", # Two cats lying on a blanket
6568+
golden_image_feature="cats",
6569+
),
6570+
}
6571+
6572+
def test_static_vlm(self):
6573+
if not self.required_envs([self.model_name]):
65936574
self.skipTest("missing required envs")
65946575

6576+
vlm_specs: TestExampleMultimodalityScript.VLMSpecs = self.vlm_specs[
6577+
self.model_name
6578+
]
65956579
prompt = "Can you describe this image?"
6580+
image_path = vlm_specs.image_path
65966581
cmds = [
65976582
"python",
65986583
f"{self.executorch_root}/examples/qualcomm/oss_scripts/llama/llama.py",
@@ -6608,14 +6593,16 @@ def test_internvl3_1b(self):
66086593
str(self.port),
66096594
"--prompt",
66106595
prompt,
6596+
"--image_path",
6597+
image_path,
66116598
"--temperature",
66126599
"0",
66136600
"--decoder_model",
6614-
"internvl3_1b",
6601+
f"{self.model_name}",
66156602
"--model_mode",
66166603
"kv",
66176604
"--max_seq_len",
6618-
"320",
6605+
f"{vlm_specs.max_seq_len}",
66196606
]
66206607
if self.compile_only:
66216608
cmds.extend(["--compile_only"])
@@ -6636,17 +6623,41 @@ def test_internvl3_1b(self):
66366623
if "Error" in msg:
66376624
self.fail(msg["Error"])
66386625
else:
6626+
if not self.compile_only:
6627+
model_out = msg["result"][0]
6628+
self.assertTrue(
6629+
vlm_specs.golden_image_feature in model_out,
6630+
f"Expected Output contains feature: '{vlm_specs.golden_image_feature}' Actual Output: '{model_out}'",
6631+
)
6632+
print(f"Image Path: {image_path}")
6633+
print(f"Query: {prompt}")
6634+
print(f"Answer: {model_out}")
66396635
if not self.enable_x86_64:
66406636
encoder_pte_size = msg["encoder_pte_size"]
66416637
text_embedding_pte_size = msg["text_embedding_pte_size"]
66426638
decoder_pte_size = msg["pte_size"]
6643-
self.assertLessEqual(encoder_pte_size, 425_000_000) # 425MB
6644-
self.assertLessEqual(text_embedding_pte_size, 300_000_000) # 300MB
6645-
self.assertLessEqual(decoder_pte_size, 550_000_000) # 550MB
6639+
self.assertLessEqual(encoder_pte_size, vlm_specs.encoder_pte_size)
6640+
self.assertLessEqual(
6641+
text_embedding_pte_size, vlm_specs.text_embedding_pte_size
6642+
)
6643+
self.assertLessEqual(decoder_pte_size, vlm_specs.decoder_pte_size)
66466644
print(f"Encoder PTE Size: {encoder_pte_size} bytes")
66476645
print(f"Text Embedding PTE Size: {text_embedding_pte_size} bytes")
66486646
print(f"Decoder PTE Size: {decoder_pte_size} bytes")
66496647

6648+
attr_name = f"{self.model.lower()}_token_rate"
6649+
if (
6650+
not self.compile_only
6651+
and not self.enable_x86_64
6652+
and hasattr(vlm_specs, attr_name)
6653+
):
6654+
device_inference_speed = msg["inference_speed"]
6655+
expected_inference_speed = getattr(vlm_specs, attr_name)
6656+
print(f"Prompt Evaluation: {device_inference_speed} tokens/second")
6657+
self.assertGreaterEqual(
6658+
device_inference_speed, expected_inference_speed
6659+
)
6660+
66506661

66516662
class TestExampleOssScript(TestQNN):
66526663
def test_albert(self):

examples/qualcomm/oss_scripts/llama/CMakeLists.txt

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,3 +81,60 @@ target_compile_options(qnn_llama_runner PUBLIC ${_common_compile_options})
8181
set_target_properties(
8282
qnn_llama_runner PROPERTIES LINK_FLAGS "-Wl,-rpath='$ORIGIN'"
8383
)
84+
85+
# build qnn multimodal runner preprocess qnn runner src files for multimodal
86+
set(_multimodal_runner__srcs ${_llama_runner__srcs})
87+
list(FILTER _multimodal_runner__srcs EXCLUDE REGEX ".*qnn_llama_runner.*")
88+
list(FILTER _multimodal_runner__srcs EXCLUDE REGEX ".*runner/runner\.(cpp|h)")
89+
list(
90+
PREPEND
91+
_multimodal_runner__srcs
92+
${CMAKE_CURRENT_LIST_DIR}/qnn_multimodal_runner.cpp
93+
${CMAKE_CURRENT_LIST_DIR}/runner/multimodal_runner/multimodal_runner.cpp
94+
${CMAKE_CURRENT_LIST_DIR}/runner/multimodal_runner/multimodal_runner.h
95+
${CMAKE_CURRENT_LIST_DIR}/runner/multimodal_runner/encoder.cpp
96+
${CMAKE_CURRENT_LIST_DIR}/runner/multimodal_runner/encoder.h
97+
${CMAKE_CURRENT_LIST_DIR}/runner/multimodal_runner/embedding_runner.cpp
98+
${CMAKE_CURRENT_LIST_DIR}/runner/multimodal_runner/embedding_runner.h
99+
${CMAKE_CURRENT_LIST_DIR}/runner/multimodal_runner/embedding_processor.cpp
100+
${CMAKE_CURRENT_LIST_DIR}/runner/multimodal_runner/embedding_processor.h
101+
${CMAKE_CURRENT_LIST_DIR}/runner/multimodal_runner/multimodal_prompt_processor.cpp
102+
${CMAKE_CURRENT_LIST_DIR}/runner/multimodal_runner/multimodal_prompt_processor.h
103+
${CMAKE_CURRENT_LIST_DIR}/runner/multimodal_runner/multimodal_token_generator.cpp
104+
${CMAKE_CURRENT_LIST_DIR}/runner/multimodal_runner/multimodal_token_generator.h
105+
${CMAKE_CURRENT_LIST_DIR}/runner/multimodal_runner/multimodal_lhd_token_generator.cpp
106+
${CMAKE_CURRENT_LIST_DIR}/runner/multimodal_runner/multimodal_lhd_token_generator.h
107+
)
108+
109+
list(APPEND _multimodal_runner__srcs)
110+
111+
# build qnn multimodal runner
112+
add_executable(qnn_multimodal_runner ${_multimodal_runner__srcs})
113+
target_include_directories(
114+
qnn_multimodal_runner PUBLIC ${_common_include_directories}
115+
)
116+
target_include_directories(
117+
qnn_multimodal_runner
118+
PUBLIC ${EXECUTORCH_ROOT}/extension/llm/tokenizers/include
119+
)
120+
target_compile_options(qnn_multimodal_runner PUBLIC ${_common_compile_options})
121+
122+
target_link_libraries(
123+
qnn_multimodal_runner
124+
qnn_executorch_backend
125+
executorch_core
126+
extension_data_loader
127+
extension_flat_tensor
128+
extension_llm_runner
129+
extension_module
130+
extension_tensor
131+
gflags
132+
custom_ops
133+
quantized_ops_lib
134+
quantized_kernels
135+
tokenizers::tokenizers
136+
)
137+
138+
set_target_properties(
139+
qnn_multimodal_runner PROPERTIES LINK_FLAGS "-Wl,-rpath='$ORIGIN'"
140+
)

0 commit comments

Comments
 (0)