Skip to content

Commit fffc3ec

Browse files
committed
Add support for batched tasks.
The idea is the following: - tasks incarnations (aka. BODY) can be marked with the "batch" property allowing the runtime to provide the task with the entire list of ready tasks of the execution stream instead of just extracting the head. - this list of ready tasks is in fact a ring, that can then be trimmed by the kernel and divided into batch and the rest. The rest of the tasks will be left in the ring, while the batch group will be submitted for execution. - the kernel also needs to provide a callback into the gpu_task complete_stage, such that the runtime can call the specialized function able to complete all batched tasks. Signed-off-by: George Bosilca <gbosilca@nvidia.com>
1 parent dcba0c0 commit fffc3ec

20 files changed

Lines changed: 221 additions & 66 deletions

CMakeLists.txt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,8 @@ option(PARSEC_GPU_ALLOC_PER_TILE
181181
mark_as_advanced(PARSEC_GPU_ALLOC_PER_TILE)
182182
option(PARSEC_GPU_WITH_CUDA
183183
"Enable GPU support using CUDA kernels" ON)
184+
option(PARSEC_GPU_WITH_CUDA_BATCH
185+
"Enable the runtime support for batched kernels" ON)
184186
option(PARSEC_GPU_WITH_HIP
185187
"Enable GPU support using HIP kernels" ON)
186188
option(PARSEC_GPU_WITH_LEVEL_ZERO
@@ -729,6 +731,12 @@ int main(int argc, char *argv[]) {
729731
endif (CUDAToolkit_FOUND)
730732
set(PARSEC_HAVE_CU_COMPILER ${CMAKE_CUDA_COMPILER} CACHE BOOL "True if PaRSEC provide support for compiling .cu files")
731733
endif( PARSEC_GPU_WITH_CUDA )
734+
if( PARSEC_GPU_WITH_CUDA_BATCH )
735+
if( NOT PARSEC_HAVE_CUDA)
736+
message(FATAL_ERROR "PARSEC_GPU_WITH_CUDA_BATCH requires PARSEC_GPU_WITH_CUDA. Enable both or none")
737+
endif( NOT PARSEC_HAVE_CUDA)
738+
set(PARSEC_HAVE_CUDA_BATCH True CACHE BOOL "True if support for batched CUDA has been enabled")
739+
endif( PARSEC_GPU_WITH_CUDA_BATCH )
732740

733741
if( PARSEC_GPU_WITH_HIP )
734742
# This is kinda ugly but the PATH and HINTS don't get transmitted to sub-dependents

cmake_modules/PaRSECConfig.cmake.in

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,10 @@ endif(@PARSEC_DIST_WITH_MPI@)
6565
if(@PARSEC_HAVE_CUDA@)
6666
find_package(CUDAToolkit REQUIRED)
6767
set(PARSEC_HAVE_CUDA TRUE)
68+
69+
if(@PARSEC_HAVE_CUDA_BATCH@)
70+
set(PARSEC_HAVE_CUDA_BATCH TRUE)
71+
endif(@PARSEC_HAVE_CUDA_BATCH@)
6872
endif(@PARSEC_HAVE_CUDA@)
6973

7074
if(@PARSEC_HAVE_HIP@)

parsec/include/parsec/parsec_options.h.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,7 @@
130130
#cmakedefine PARSEC_HAVE_DEV_CPU_SUPPORT
131131
#cmakedefine PARSEC_HAVE_DEV_RECURSIVE_SUPPORT
132132
#cmakedefine PARSEC_HAVE_DEV_CUDA_SUPPORT
133+
#cmakedefine PARSEC_HAVE_DEV_CUDA_BATCH_SUPPORT
133134
#cmakedefine PARSEC_HAVE_DEV_HIP_SUPPORT
134135
#cmakedefine PARSEC_HAVE_DEV_LEVEL_ZERO_SUPPORT
135136
#cmakedefine PARSEC_HAVE_DEV_OPENCL_SUPPORT

parsec/interfaces/dtd/insert_function.c

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
* Copyright (c) 2013-2023 The University of Tennessee and The University
33
* of Tennessee Research Foundation. All rights
44
* reserved.
5-
* Copyright (c) 2023 NVIDIA Corporation. All rights reserved.
5+
* Copyright (c) 2023-2024 NVIDIA Corporation. All rights reserved.
66
*/
77

88
/* **************************************************************************** */
@@ -1477,7 +1477,7 @@ parsec_dtd_startup(parsec_context_t *context,
14771477
if( !(tp->devices_index_mask & (1 << device->device_index))) continue; /* not supported */
14781478
// If CUDA is enabled, let the CUDA device activated for this
14791479
// taskpool.
1480-
if( PARSEC_DEV_CUDA == device->type ) continue;
1480+
if( PARSEC_DEV_CUDA & device->type ) continue;
14811481
if( NULL != device->taskpool_register )
14821482
if( PARSEC_SUCCESS !=
14831483
device->taskpool_register(device, (parsec_taskpool_t *)tp)) {
@@ -2355,8 +2355,8 @@ int parsec_dtd_task_class_add_chore(parsec_taskpool_t *tp,
23552355
/* We assume that incarnations is big enough, because it has been pre-allocated
23562356
* with PARSEC_DEV_MAX_NB_TYPE+1 chores, as this is a DTD task class */
23572357
incarnations = (__parsec_chore_t*)dtd_tc->super.incarnations;
2358-
for(i = 0; i < PARSEC_DEV_MAX_NB_TYPE && incarnations[i].type != PARSEC_DEV_NONE; i++) {
2359-
if( incarnations[i].type == device_type ) {
2358+
for(i = 0; i < PARSEC_DEV_MAX_NB_TYPE && (incarnations[i].type & PARSEC_DEV_ANY_TYPE) != PARSEC_DEV_NONE; i++) {
2359+
if( incarnations[i].type & PARSEC_DEV_ANY_TYPE & device_type ) {
23602360
parsec_warning("A chore for this device type has already been added to task class '%s'\n",
23612361
tc->name);
23622362
return PARSEC_ERROR;
@@ -2369,7 +2369,7 @@ int parsec_dtd_task_class_add_chore(parsec_taskpool_t *tp,
23692369
}
23702370

23712371
incarnations[i].type = device_type;
2372-
if(PARSEC_DEV_CUDA == device_type) {
2372+
if(PARSEC_DEV_CUDA & device_type) {
23732373
incarnations[i].hook = parsec_dtd_gpu_task_submit;
23742374
dtd_tc->gpu_func_ptr = (parsec_advance_task_function_t)function;
23752375
}
@@ -3258,19 +3258,20 @@ __parsec_dtd_taskpool_create_task(parsec_taskpool_t *tp,
32583258
dtd_tc = parsec_dtd_create_task_classv(name_of_kernel, nb_params, params);
32593259
tc = &dtd_tc->super;
32603260

3261-
__parsec_chore_t **incarnations = (__parsec_chore_t **)&tc->incarnations;
3262-
(*incarnations)[0].type = device_type;
3263-
if( device_type == PARSEC_DEV_CUDA ) {
3261+
__parsec_chore_t *incarnations = (__parsec_chore_t *)tc->incarnations;
3262+
incarnations[0].type = device_type;
3263+
if( device_type & PARSEC_DEV_CUDA ) {
32643264
/* Special case for CUDA: we need an intermediate */
3265-
(*incarnations)[0].hook = parsec_dtd_gpu_task_submit;
3265+
incarnations[0].hook = parsec_dtd_gpu_task_submit;
32663266
dtd_tc->gpu_func_ptr = (parsec_advance_task_function_t)fpointer;
32673267
}
32683268
else {
32693269
/* Default case: the user-provided function is directly the hook to call */
3270-
(*incarnations)[0].hook = fpointer; // We can directly call the CPU hook
3270+
incarnations[0].hook = fpointer; // We can directly call the CPU hook
32713271
dtd_tc->cpu_func_ptr = fpointer;
32723272
}
3273-
(*incarnations)[1].type = PARSEC_DEV_NONE;
3273+
incarnations[1].type = PARSEC_DEV_NONE;
3274+
incarnations[1].hook = NULL;
32743275

32753276
/* Bookkeeping of the task class */
32763277
parsec_dtd_register_task_class(&dtd_tp->super, fkey, tc);

parsec/interfaces/ptg/ptg-compiler/jdf2c.c

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
* Copyright (c) 2009-2023 The University of Tennessee and The University
33
* of Tennessee Research Foundation. All rights
44
* reserved.
5+
* Copyright (c) 2024 NVIDIA Corporation. All rights reserved.
56
*/
67

78
#include "parsec/parsec_config.h"
@@ -3938,25 +3939,40 @@ jdf_generate_function_incarnation_list( const jdf_t *jdf,
39383939
jdf_def_list_t* dyld_property;
39393940
jdf_def_list_t* evaluate_property = NULL;
39403941
jdf_def_list_t* device_property = NULL;
3942+
jdf_def_list_t* batch_property = NULL;
39413943

39423944
(void)jdf;
39433945
string_arena_add_string(sa, "static const __parsec_chore_t __%s_chores[] ={\n", base_name);
39443946
do {
39453947
jdf_find_property(body->properties, "type", &type_property);
39463948
jdf_find_property(body->properties, "dyld", &dyld_property);
39473949
jdf_find_property(body->properties, JDF_BODY_PROP_EVALUATE, &evaluate_property);
3948-
if( NULL == type_property) {
3950+
jdf_find_property(body->properties, "batch", &batch_property);
3951+
if (NULL == type_property)
3952+
{
39493953
string_arena_add_string(sa, "#if defined(PARSEC_HAVE_DEV_CPU_SUPPORT)\n");
39503954
string_arena_add_string(sa, " { .type = PARSEC_DEV_CPU,\n");
39513955
string_arena_add_string(sa, " .evaluate = (parsec_evaluate_function_t*)%s,\n",
39523956
(NULL == evaluate_property) ? "NULL" : evaluate_property->expr->jdf_c_code.fname);
39533957
string_arena_add_string(sa, " .hook = (parsec_hook_t*)hook_of_%s },\n", base_name);
39543958
string_arena_add_string(sa, "#endif /* defined(PARSEC_HAVE_DEV_CPU_SUPPORT) */\n");
3955-
} else {
3959+
if( NULL != batch_property ) {
3960+
fprintf(stderr,
3961+
"Error: batched property (%s) not allowed for devices other than accelerators in body of task %s at line %d\n",
3962+
batch_property->expr->jdf_var, f->fname, JDF_OBJECT_LINENO(body));
3963+
assert( NULL != batch_property );
3964+
}
3965+
}
3966+
else
3967+
{
39563968
char* dev_upper = strdup_upper(type_property->expr->jdf_var);
39573969

39583970
string_arena_add_string(sa, "#if defined(PARSEC_HAVE_DEV_%s_SUPPORT)\n", dev_upper);
3959-
string_arena_add_string(sa, " { .type = PARSEC_DEV_%s,\n", dev_upper);
3971+
string_arena_add_string(sa, " { .type = PARSEC_DEV_%s", dev_upper);
3972+
if( NULL != batch_property) {
3973+
string_arena_add_string(sa, " | PARSEC_DEV_CHORE_ALLOW_BATCH");
3974+
}
3975+
string_arena_add_string(sa, ",\n");
39603976
if( NULL == dyld_property ) {
39613977
string_arena_add_string(sa, " .dyld = NULL,\n");
39623978
} else {
@@ -4491,7 +4507,7 @@ static void jdf_generate_startup_hook( const jdf_t *jdf )
44914507
" parsec_task_class_t* tc = (parsec_task_class_t*)__parsec_tp->super.super.task_classes_array[i];\n"
44924508
" __parsec_chore_t* chores = (__parsec_chore_t*)tc->incarnations;\n"
44934509
" uint32_t idx = 0, j;\n"
4494-
" for( j = 0; PARSEC_DEV_NONE != chores[j].type; j++ ) {\n"
4510+
" for( j = 0; PARSEC_DEV_NONE != (chores[j].type & PARSEC_DEV_ANY_TYPE); j++ ) {\n"
44954511
" if( !(supported_dev & chores[j].type) ) continue;\n"
44964512
" if( j != idx ) {\n"
44974513
" chores[idx] = chores[j];\n"
@@ -4680,7 +4696,7 @@ static void jdf_generate_constructor( const jdf_t* jdf )
46804696
coutput(" for( i = 0; i < __parsec_tp->super.super.nb_task_classes; i++ ) {\n"
46814697
" __parsec_tp->super.super.task_classes_array[i] = tc = malloc(sizeof(parsec_task_class_t));\n"
46824698
" memcpy(tc, %s_task_classes[i], sizeof(parsec_task_class_t));\n"
4683-
" for( j = 0; PARSEC_DEV_NONE != tc->incarnations[j].type; j++); /* compute the number of incarnations */\n"
4699+
" for( j = 0; PARSEC_DEV_NONE != (tc->incarnations[j].type & PARSEC_DEV_ANY_TYPE); j++); /* compute the number of incarnations */\n"
46844700
" tc->incarnations = (__parsec_chore_t*)malloc((j+1) * sizeof(__parsec_chore_t));\n "
46854701
" memcpy((__parsec_chore_t*)tc->incarnations, %s_task_classes[i]->incarnations, (j+1) * sizeof(__parsec_chore_t));\n\n"
46864702
" /* Add a placeholder for initialization and startup task */\n"
@@ -6731,8 +6747,8 @@ static void jdf_generate_code_hook_gpu(const jdf_t *jdf,
67316747
coutput(" /* Pointer to dynamic gpu function */\n"
67326748
" {\n"
67336749
" int chore_idx = 0;\n"
6734-
" for ( ; PARSEC_DEV_NONE != this_task->task_class->incarnations[chore_idx].type; ++chore_idx) {\n"
6735-
" if (this_task->task_class->incarnations[chore_idx].type == PARSEC_DEV_%s) break;\n"
6750+
" for ( ; PARSEC_DEV_NONE != (this_task->task_class->incarnations[chore_idx].type & PARSEC_DEV_ANY_TYPE); ++chore_idx) {\n"
6751+
" if (this_task->task_class->incarnations[chore_idx].type & PARSEC_DEV_%s) break;\n"
67366752
" }\n"
67376753
" /* The void* cast prevents the compiler from complaining about the type change */\n"
67386754
" parsec_body.dyld_fn = (%s)(void*)this_task->task_class->incarnations[chore_idx].dyld_fn;\n"
@@ -6983,7 +6999,7 @@ static void jdf_generate_code_hook(const jdf_t *jdf,
69836999
coutput("#if defined(PARSEC_HAVE_DEV_%s_SUPPORT)\n", type_upper);
69847000
if( NULL != type_property) {
69857001

6986-
if (!strcasecmp(type_property->expr->jdf_var, "cuda")
7002+
if (!strncasecmp(type_property->expr->jdf_var, "cuda", 4) /* for batched */
69877003
|| !strcasecmp(type_property->expr->jdf_var, "hip")) {
69887004
jdf_generate_code_hook_gpu(jdf, f, body, name);
69897005
goto hook_end_block;

parsec/mca/device/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@ set(PARSEC_HAVE_DEV_RECURSIVE_SUPPORT 0 CACHE BOOL "PaRSEC has support for Recu
1414
if(PARSEC_HAVE_CUDA)
1515
set(PARSEC_HAVE_DEV_CUDA_SUPPORT 1 CACHE BOOL "PaRSEC support for CUDA")
1616
endif(PARSEC_HAVE_CUDA)
17+
if(PARSEC_HAVE_CUDA_BATCH)
18+
set(PARSEC_HAVE_DEV_CUDA_BATCH_SUPPORT 1 CACHE BOOL "PaRSEC support for batched CUDA")
19+
endif(PARSEC_HAVE_CUDA_BATCH)
1720
if(PARSEC_HAVE_HIP)
1821
set(PARSEC_HAVE_DEV_HIP_SUPPORT 1 CACHE BOOL "PaRSEC support for HIP")
1922
endif(PARSEC_HAVE_HIP)

parsec/mca/device/cuda/device_cuda_component.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -276,7 +276,7 @@ static int device_cuda_component_close(void)
276276
/* Check that no CUDA devices are still registered with PaRSEC */
277277
for(i = 0; i < parsec_mca_device_enabled(); i++) {
278278
if( NULL == (cdev = (parsec_device_cuda_module_t*)parsec_mca_device_get(i)) ) continue;
279-
if(PARSEC_DEV_CUDA != cdev->super.super.type) continue;
279+
if(PARSEC_DEV_CUDA & cdev->super.super.type) continue;
280280

281281
PARSEC_DEBUG_VERBOSE(0, parsec_gpu_output_stream,
282282
"GPU[%d:%s] CUDA device %d still registered with PaRSEC at the end of CUDA finalize.\n"

parsec/mca/device/device.c

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
* Copyright (c) 2013-2023 The University of Tennessee and The University
44
* of Tennessee Research Foundation. All rights
55
* reserved.
6+
* Copyright (c) 2024 NVIDIA Corporation. All rights reserved.
67
*/
78

89
#include "parsec/parsec_config.h"
@@ -107,7 +108,7 @@ int parsec_select_best_device( parsec_task_t* this_task ) {
107108

108109
/* Run the evaluates for the incarnation types to determine if they can
109110
* execute this task */
110-
for(chore_id = 0; PARSEC_DEV_NONE != tc->incarnations[chore_id].type; chore_id++) {
111+
for(chore_id = 0; PARSEC_DEV_NONE != (tc->incarnations[chore_id].type & PARSEC_DEV_ANY_TYPE); chore_id++) {
111112
if( 0 == (this_task->chore_mask & (1<<chore_id)) ) continue;
112113
if( NULL == tc->incarnations[chore_id].hook ) continue; /* dyld hook not found during initialization */
113114

@@ -116,15 +117,15 @@ int parsec_select_best_device( parsec_task_t* this_task ) {
116117
if( PARSEC_HOOK_RETURN_DONE != rc ) {
117118
if( PARSEC_HOOK_RETURN_NEXT != rc ) {
118119
PARSEC_DEBUG_VERBOSE(5, parsec_device_output, "Failed to evaluate %s[%d] chore %d",
119-
tmp, tc->incarnations[chore_id].type,
120+
tmp, tc->incarnations[chore_id].type & PARSEC_DEV_ANY_TYPE,
120121
chore_id);
121122
}
122123
/* Mark this chore as tested */
123124
this_task->chore_mask &= ~( 1<<chore_id );
124125
continue;
125126
}
126127
}
127-
valid_types |= tc->incarnations[chore_id].type; /* the eval accepted the type, but no device specified yet */
128+
valid_types |= (tc->incarnations[chore_id].type & PARSEC_DEV_ANY_TYPE); /* the eval accepted the type, but no device specified yet */
128129
/* Evaluate may have picked a device, abide by it */
129130
if( NULL != this_task->selected_device ) {
130131
assert( this_task->selected_device->type & valid_types );
@@ -140,7 +141,7 @@ int parsec_select_best_device( parsec_task_t* this_task ) {
140141
if (PARSEC_DEV_CPU == valid_types) { /* shortcut for CPU only tasks */
141142
this_task->selected_device = dev = parsec_mca_device_get(0);
142143
this_task->load = 0;
143-
for(chore_id = 0; tc->incarnations[chore_id].type != PARSEC_DEV_CPU; chore_id++);
144+
for(chore_id = 0; !(tc->incarnations[chore_id].type & PARSEC_DEV_CPU); chore_id++);
144145
this_task->selected_chore = chore_id;
145146
PARSEC_DEBUG_VERBOSE(80, parsec_device_output, "%s: Task %s cpu-only task set selected_device %d:%s",
146147
__func__, tmp, dev->device_index, dev->name);
@@ -226,7 +227,7 @@ int parsec_select_best_device( parsec_task_t* this_task ) {
226227
/* Skip the device if no incarnations for its type */
227228
if(!(dev->type & valid_types)) continue;
228229
/* Skip recursive devices: time estimates are computed on the associated CPU device */
229-
if(dev->type == PARSEC_DEV_RECURSIVE) continue;
230+
if(dev->type & PARSEC_DEV_RECURSIVE) continue;
230231

231232
eta = dev->device_load + time_estimate(this_task, dev);
232233
if( best_eta > eta ) {
@@ -244,14 +245,14 @@ int parsec_select_best_device( parsec_task_t* this_task ) {
244245
goto no_valid_device;
245246

246247
this_task->selected_device = parsec_mca_device_get(best_index);
247-
assert( this_task->selected_device->type != PARSEC_DEV_RECURSIVE );
248+
assert( !(this_task->selected_device->type & PARSEC_DEV_RECURSIVE) );
248249
}
249250

250251
device_selected:
251252
dev = this_task->selected_device;
252253
assert( NULL != dev );
253254
assert( tp->devices_index_mask & (1 << dev->device_index) );
254-
for(chore_id = 0; tc->incarnations[chore_id].type != dev->type; chore_id++)
255+
for(chore_id = 0; !(tc->incarnations[chore_id].type & dev->type); chore_id++)
255256
assert(PARSEC_DEV_NONE != tc->incarnations[chore_id].type /* we have selected this device, so there *must* be an incarnation that matches */);
256257
this_task->selected_chore = chore_id;
257258
this_task->load = time_estimate(this_task, dev);
@@ -748,8 +749,8 @@ int parsec_mca_device_registration_complete(parsec_context_t* context)
748749
for( uint32_t i = 0; i < parsec_nb_devices; i++ ) {
749750
parsec_device_module_t* device = parsec_devices[i];
750751
if( NULL == device ) continue;
751-
if( PARSEC_DEV_RECURSIVE == device->type ) continue;
752-
if( PARSEC_DEV_CPU == device->type ) {
752+
if( PARSEC_DEV_RECURSIVE & device->type ) continue;
753+
if( PARSEC_DEV_CPU & device->type ) {
753754
c = 0;
754755
for(int p = 0; p < context->nb_vp; p++)
755756
c += context->virtual_processes[p]->nb_cores;
@@ -768,7 +769,7 @@ int parsec_mca_device_registration_complete(parsec_context_t* context)
768769
for( uint32_t i = 0; i < parsec_nb_devices; i++ ) {
769770
parsec_device_module_t* device = parsec_devices[i];
770771
if( NULL == device ) continue;
771-
if( PARSEC_DEV_RECURSIVE == device->type ) continue;
772+
if( PARSEC_DEV_RECURSIVE & device->type ) continue;
772773
device->time_estimate_default = total_gflops_fp64/(double)device->gflops_fp64;
773774
parsec_debug_verbose(6, parsec_device_output, " Dev[%d] default-time-estimate %-4"PRId64" <- double %-8"PRId64" single %-8"PRId64" tensor %-8"PRId64" half %-8"PRId64" %s",
774775
i, device->time_estimate_default, device->gflops_fp64, device->gflops_fp32, device->gflops_tf32, device->gflops_fp16, device->gflops_guess? "GUESSED": "");
@@ -933,7 +934,7 @@ device_taskpool_register_static(parsec_device_module_t* device, parsec_taskpool_
933934
continue;
934935
__parsec_chore_t* chores = (__parsec_chore_t*)tc->incarnations;
935936
for( j = 0; NULL != chores[j].hook; j++ ) {
936-
if( chores[j].type != device->type )
937+
if( !(chores[j].type & device->type) )
937938
continue;
938939
if( NULL != chores[j].dyld_fn ) {
939940
continue; /* the function has been set for another device of the same type */

parsec/mca/device/device.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
* Copyright (c) 2013-2023 The University of Tennessee and The University
33
* of Tennessee Research Foundation. All rights
44
* reserved.
5+
* Copyright (c) 2024 NVIDIA Corporation. All rights reserved.
56
*/
67

78
/** @addtogroup parsec_device
@@ -65,10 +66,13 @@ typedef struct parsec_device_base_component_2_0_0 parsec_device_base_component_t
6566
#define PARSEC_DEV_CUDA ((uint8_t)(1 << 2))
6667
#define PARSEC_DEV_HIP ((uint8_t)(1 << 3))
6768
#define PARSEC_DEV_LEVEL_ZERO ((uint8_t)(1 << 4))
69+
#define PARSEC_DEV_CUDA_BATCH ((uint8_t)(1 << 5))
6870
#define PARSEC_DEV_TEMPLATE ((uint8_t)(1 << 7))
6971
#define PARSEC_DEV_ANY_TYPE ((uint8_t) 0x3f)
7072
#define PARSEC_DEV_ALL ((uint8_t) 0x3f)
7173
#define PARSEC_DEV_MAX_NB_TYPE (7)
74+
/* The following flags are extensions to the device type */
75+
#define PARSEC_DEV_CHORE_ALLOW_BATCH ((uint32_t)0x00000100)
7276

7377
#define PARSEC_DEV_GPU_MASK (PARSEC_DEV_CUDA|PARSEC_DEV_HIP|PARSEC_DEV_LEVEL_ZERO)
7478
#define PARSEC_DEV_IS_GPU(t) (0 != ((t) & PARSEC_DEV_GPU_MASK))

0 commit comments

Comments
 (0)