Skip to content

Commit 439f1b1

Browse files
aicss-genaictao456
andauthored
sycl: add GGML_SYCL_USE_ASYNC_MEM_OP env toggle (#22153)
* sycl: add GGML_SYCL_USE_ASYNC_MEM_OP env toggle Signed-off-by: Chun Tao <chun.tao@intel.com> * Use async mem ops for correctness when SYCL graphs are explicitly on. Signed-off-by: Tao, Chun <chun.tao@intel.com> --------- Signed-off-by: Chun Tao <chun.tao@intel.com> Signed-off-by: Tao, Chun <chun.tao@intel.com> Co-authored-by: Chun Tao <chun.tao@intel.com>
1 parent c3e9ade commit 439f1b1

1 file changed

Lines changed: 7 additions & 4 deletions

File tree

ggml/src/ggml-sycl/ggml-sycl.cpp

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ int g_ggml_sycl_disable_graph = 0;
7272
int g_ggml_sycl_disable_dnn = 0;
7373
int g_ggml_sycl_prioritize_dmmv = 0;
7474
int g_ggml_sycl_use_async_mem_op = 0;
75+
int g_ggml_sycl_use_async_mem_op_requested = 1;
7576
int g_ggml_sycl_enable_level_zero = 0;
7677
int g_ggml_sycl_enable_flash_attention = 1;
7778

@@ -304,6 +305,8 @@ static void ggml_check_sycl() try {
304305
GGML_LOG_INFO(" GGML_SYCL_DISABLE_DNN: DNN disabled by compile flag\n");
305306
#endif
306307
GGML_LOG_INFO(" GGML_SYCL_PRIORITIZE_DMMV: %d\n", g_ggml_sycl_prioritize_dmmv);
308+
g_ggml_sycl_use_async_mem_op_requested = get_sycl_env("GGML_SYCL_USE_ASYNC_MEM_OP", 1);
309+
GGML_LOG_INFO(" GGML_SYCL_USE_ASYNC_MEM_OP: %d\n", g_ggml_sycl_use_async_mem_op_requested);
307310

308311
#ifdef SYCL_FLASH_ATTN
309312
GGML_LOG_INFO(" GGML_SYCL_ENABLE_FLASH_ATTN: %d\n", g_ggml_sycl_enable_flash_attention);
@@ -319,11 +322,11 @@ static void ggml_check_sycl() try {
319322
fprintf(stderr, "%s: SYCL_USE_XMX: no\n", __func__);
320323
#endif
321324
*/
322-
// Currently, we only use async malloc / free when graphs are enabled as it is required for the calls to be
323-
// properly recorded. As this SYCL extension matures it may be beneficial to enable as the default path and in
324-
// other places.
325+
// Async USM allocation/free is also useful outside the graph path: it avoids the host waits in the reorder
326+
// staging path while preserving queue ordering semantics. Graph support still depends on the extension being
327+
// available, but it no longer needs to control the non-graph fast path.
325328
#if defined(GGML_SYCL_GRAPH) && SYCL_EXT_ONEAPI_ASYNC_MEMORY_ALLOC
326-
g_ggml_sycl_use_async_mem_op = !g_ggml_sycl_disable_graph;
329+
g_ggml_sycl_use_async_mem_op = g_ggml_sycl_use_async_mem_op_requested || !g_ggml_sycl_disable_graph;
327330
if (g_ggml_sycl_use_async_mem_op) {
328331
for (unsigned int i = 0; i < dpct::dev_mgr::instance().device_count(); ++i) {
329332
if (!dpct::dev_mgr::instance().get_device(i).has(sycl::aspect::ext_oneapi_async_memory_alloc)) {

0 commit comments

Comments
 (0)