Skip to content

Commit cd45d6a

Browse files
authored
Merge branch 'polycam' into main
2 parents 476a7ef + 5acac1e commit cd45d6a

8 files changed

Lines changed: 88 additions & 8 deletions

File tree

backends/vulkan/cmake/ShaderLibrary.cmake

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,14 @@ function(gen_vulkan_shader_lib_cpp shaders_path)
6060
)
6161
endif()
6262

63+
# Allow overriding GLSL PRECISION for fp16 shader variants. Empty / unset
64+
# keeps upstream default (`highp`). Accepted values: highp, mediump, lowp.
65+
if(EXECUTORCH_VULKAN_FP16_PRECISION)
66+
list(APPEND GEN_SPV_ARGS "--fp16-precision"
67+
"${EXECUTORCH_VULKAN_FP16_PRECISION}"
68+
)
69+
endif()
70+
6371
# Ninja cannot expand wildcards (*) in DEPENDS lists.
6472
file(GLOB VULKAN_SHADERS "${shaders_path}/*.glsl" "${shaders_path}/*.glslh"
6573
"${shaders_path}/*.yaml" "${shaders_path}/*.h"

backends/vulkan/runtime/gen_vulkan_spv.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -663,6 +663,7 @@ def __init__(
663663
glslc_path: Optional[str],
664664
glslc_flags: str = "",
665665
replace_u16vecn: bool = False,
666+
fp16_precision: str = "highp",
666667
) -> None:
667668
if isinstance(src_dir_paths, str):
668669
self.src_dir_paths = [src_dir_paths]
@@ -678,6 +679,7 @@ def __init__(
678679
if "-Os" in self.glslc_flags_no_opt:
679680
self.glslc_flags_no_opt.remove("-Os")
680681
self.replace_u16vecn = replace_u16vecn
682+
self.fp16_precision = fp16_precision
681683

682684
self.src_files: Dict[str, str] = {}
683685
self.template_yaml_files: List[str] = []
@@ -857,6 +859,17 @@ def create_shader_params(
857859
for key, value in variant_params.items():
858860
shader_params[key] = value
859861

862+
# Optionally override PRECISION for half-precision variants. GLSL
863+
# `mediump` is a hint the driver may use fp16 ALUs for arithmetic.
864+
# On Mali GPUs it's typically honored; on Adreno it's typically
865+
# ignored (harmless). Default is `highp` to match upstream behavior.
866+
if (
867+
self.fp16_precision != "highp"
868+
and shader_params.get("DTYPE") == "half"
869+
and shader_params.get("PRECISION") == "highp"
870+
):
871+
shader_params["PRECISION"] = self.fp16_precision
872+
860873
return shader_params
861874

862875
def constructOutputMap(self) -> None:
@@ -1488,6 +1501,16 @@ def main(argv: List[str]) -> int:
14881501
default=-1,
14891502
help="Number of threads for shader compilation. -1 (default) uses all available CPU cores, 1 uses sequential compilation.",
14901503
)
1504+
parser.add_argument(
1505+
"--fp16-precision",
1506+
choices=["highp", "mediump", "lowp"],
1507+
default="highp",
1508+
help=(
1509+
"GLSL PRECISION qualifier for DTYPE=half shader variants. "
1510+
"`mediump` lets drivers (notably Mali) use fp16 ALUs for arithmetic. "
1511+
"Default `highp` matches upstream behavior. Ignored on fp32 variants."
1512+
),
1513+
)
14911514
options = parser.parse_args()
14921515

14931516
env = DEFAULT_ENV
@@ -1520,6 +1543,7 @@ def main(argv: List[str]) -> int:
15201543
options.glslc_path,
15211544
glslc_flags=glslc_flags_str,
15221545
replace_u16vecn=options.replace_u16vecn,
1546+
fp16_precision=options.fp16_precision,
15231547
)
15241548
output_spv_files = shader_generator.generateSPV(
15251549
options.output_path,

extension/android/executorch_android/src/main/java/org/pytorch/executorch/Module.java

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,28 @@ public boolean etdump() {
265265
@DoNotStrip
266266
private native boolean etdumpNative();
267267

268+
/**
269+
* Dump the ExecuTorch ETDump file to {@code outputPath}.
270+
*
271+
* @param outputPath absolute path to write the etdump file to.
272+
* @return true if the etdump was successfully written, false otherwise.
273+
*/
274+
@Experimental
275+
public boolean etdump(String outputPath) {
276+
mLock.lock();
277+
try {
278+
if (!mHybridData.isValid()) {
279+
throw new IllegalStateException("Module has been destroyed");
280+
}
281+
return etdumpToNative(outputPath);
282+
} finally {
283+
mLock.unlock();
284+
}
285+
}
286+
287+
@DoNotStrip
288+
private native boolean etdumpToNative(String outputPath);
289+
268290
/**
269291
* Explicitly destroys the native Module object. Calling this method is not required, as the
270292
* native object will be destroyed when this object is garbage-collected. However, the timing of

extension/android/jni/jni_layer.cpp

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -472,36 +472,51 @@ class ExecuTorchJni : public facebook::jni::HybridClass<ExecuTorchJni> {
472472
}
473473

474474
jboolean etdump() {
475+
return etdump_to_path("/data/local/tmp/result.etdump");
476+
}
477+
478+
jboolean etdumpTo(facebook::jni::alias_ref<jstring> outputPath) {
479+
return etdump_to_path(outputPath->toStdString().c_str());
480+
}
481+
482+
private:
483+
jboolean etdump_to_path(const char* path) {
475484
#ifdef EXECUTORCH_ANDROID_PROFILING
476485
executorch::etdump::ETDumpGen* etdumpgen =
477486
(executorch::etdump::ETDumpGen*)module_->event_tracer();
478487
auto etdump_data = etdumpgen->get_etdump_data();
479488

480489
if (etdump_data.buf != nullptr && etdump_data.size > 0) {
481-
int etdump_file =
482-
open("/data/local/tmp/result.etdump", O_WRONLY | O_CREAT, 0644);
490+
int etdump_file = open(path, O_WRONLY | O_CREAT | O_TRUNC, 0644);
483491
if (etdump_file == -1) {
484-
ET_LOG(Error, "Cannot create result.etdump error: %d", errno);
492+
ET_LOG(Error, "Cannot create %s error: %d", path, errno);
493+
free(etdump_data.buf);
485494
return false;
486495
}
487496
ssize_t bytes_written =
488497
write(etdump_file, (uint8_t*)etdump_data.buf, etdump_data.size);
489498
if (bytes_written == -1) {
490-
ET_LOG(Error, "Cannot write result.etdump error: %d", errno);
499+
ET_LOG(Error, "Cannot write %s error: %d", path, errno);
500+
close(etdump_file);
501+
free(etdump_data.buf);
491502
return false;
492503
} else {
493-
ET_LOG(Info, "ETDump written %d bytes to file.", bytes_written);
504+
ET_LOG(Info, "ETDump written %zd bytes to %s.", bytes_written, path);
494505
}
495506
close(etdump_file);
496507
free(etdump_data.buf);
497508
return true;
498509
} else {
499510
ET_LOG(Error, "No ETDump data available!");
500511
}
512+
#else
513+
(void)path;
501514
#endif
502515
return false;
503516
}
504517

518+
public:
519+
505520
facebook::jni::local_ref<facebook::jni::JArrayClass<jstring>> getMethods() {
506521
const auto& names_result = module_->method_names();
507522
if (!names_result.ok()) {
@@ -571,6 +586,7 @@ class ExecuTorchJni : public facebook::jni::HybridClass<ExecuTorchJni> {
571586
makeNativeMethod(
572587
"readLogBufferStaticNative", ExecuTorchJni::readLogBufferStatic),
573588
makeNativeMethod("etdumpNative", ExecuTorchJni::etdump),
589+
makeNativeMethod("etdumpToNative", ExecuTorchJni::etdumpTo),
574590
makeNativeMethod("getMethodsNative", ExecuTorchJni::getMethods),
575591
makeNativeMethod("getUsedBackends", ExecuTorchJni::getUsedBackends),
576592
});

scripts/build_android_library.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ build_android_native_library() {
4040
--preset "android-${ANDROID_ABI}" \
4141
-DANDROID_PLATFORM=android-26 \
4242
-DEXECUTORCH_ENABLE_EVENT_TRACER="${EXECUTORCH_ANDROID_PROFILING:-OFF}" \
43+
-DEXECUTORCH_ANDROID_PROFILING="${EXECUTORCH_ANDROID_PROFILING:-OFF}" \
4344
-DEXECUTORCH_BUILD_EXTENSION_LLM="${EXECUTORCH_BUILD_EXTENSION_LLM:-ON}" \
4445
-DEXECUTORCH_BUILD_EXTENSION_LLM_RUNNER="${EXECUTORCH_BUILD_EXTENSION_LLM:-ON}" \
4546
-DEXECUTORCH_BUILD_EXTENSION_ASR_RUNNER="${EXECUTORCH_BUILD_EXTENSION_LLM:-ON}" \
@@ -50,7 +51,9 @@ build_android_native_library() {
5051
-DEXECUTORCH_BUILD_QNN="${EXECUTORCH_BUILD_QNN}" \
5152
-DQNN_SDK_ROOT="${QNN_SDK_ROOT}" \
5253
-DEXECUTORCH_BUILD_VULKAN="${EXECUTORCH_BUILD_VULKAN}" \
54+
-DEXECUTORCH_VULKAN_FP16_PRECISION="${EXECUTORCH_VULKAN_FP16_PRECISION:-highp}" \
5355
-DXNNPACK_ENABLE_ARM_SME2="${XNNPACK_ENABLE_ARM_SME2}" \
56+
-DFLATCC_ALLOW_WERROR=OFF \
5457
-DSUPPORT_REGEX_LOOKAHEAD=ON \
5558
-DCMAKE_BUILD_TYPE="${EXECUTORCH_CMAKE_BUILD_TYPE}" \
5659
-B"${CMAKE_OUT}"

third-party/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,11 +95,12 @@ ExternalProject_Add(
9595
PREFIX ${CMAKE_CURRENT_BINARY_DIR}/flatcc_ep
9696
SOURCE_DIR ${PROJECT_SOURCE_DIR}/third-party/flatcc
9797
BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/flatcc_ep/src/build
98-
CMAKE_ARGS
98+
CMAKE_ARGS
9999
-DFLATCC_RTONLY=OFF
100100
-DFLATCC_TEST=OFF
101101
-DFLATCC_REFLECTION=OFF
102102
-DFLATCC_DEBUG_CLANG_SANITIZE=OFF
103+
-DFLATCC_ALLOW_WERROR=OFF
103104
-DFLATCC_INSTALL=ON
104105
-DCMAKE_POLICY_VERSION_MINIMUM=3.5
105106
-DCMAKE_INSTALL_PREFIX:PATH=<INSTALL_DIR>

tools/cmake/common/preset.cmake

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -82,12 +82,12 @@ macro(define_overridable_option NAME DESCRIPTION VALUE_TYPE DEFAULT_VALUE)
8282
if(DEFINED ${NAME} AND NOT DEFINED CACHE{${NAME}})
8383
set(${NAME}
8484
${${NAME}}
85-
CACHE ${VALUE_TYPE} ${DESCRIPTION} FORCE
85+
CACHE ${VALUE_TYPE} "${DESCRIPTION}" FORCE
8686
)
8787
else()
8888
set(${NAME}
8989
${DEFAULT_VALUE}
90-
CACHE ${VALUE_TYPE} ${DESCRIPTION}
90+
CACHE ${VALUE_TYPE} "${DESCRIPTION}"
9191
)
9292
endif()
9393

tools/cmake/preset/default.cmake

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,12 @@ define_overridable_option(
168168
define_overridable_option(
169169
EXECUTORCH_BUILD_VULKAN "Build the Vulkan backend" BOOL OFF
170170
)
171+
define_overridable_option(
172+
EXECUTORCH_VULKAN_FP16_PRECISION
173+
"GLSL PRECISION for Vulkan half-precision shader variants. Accepted values: highp, mediump, lowp. `mediump` lets Mali drivers use fp16 ALUs; ignored on Adreno. Default `highp` matches upstream."
174+
STRING
175+
highp
176+
)
171177
define_overridable_option(
172178
EXECUTORCH_BUILD_PORTABLE_OPS "Build portable_ops library" BOOL ON
173179
)

0 commit comments

Comments
 (0)