Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions parsec/interfaces/ptg/ptg-compiler/jdf2c.c
Original file line number Diff line number Diff line change
Expand Up @@ -4497,6 +4497,7 @@ static void jdf_generate_startup_hook( const jdf_t *jdf )
" idx++;\n"
" }\n"
" chores[idx].type = PARSEC_DEV_NONE;\n"
" chores[idx].flags = PARSEC_CHORE_FLAG_NONE;\n"
" chores[idx].evaluate = NULL;\n"
" chores[idx].hook = NULL;\n"
" /* Create the initialization tasks for each taskclass */\n"
Expand Down
21 changes: 20 additions & 1 deletion parsec/mca/device/device_gpu.c
Original file line number Diff line number Diff line change
Expand Up @@ -2498,6 +2498,16 @@ parsec_device_kernel_cleanout( parsec_device_gpu_module_t *gpu_device,
return 0;
}

/**
* Returns true if the task's completion should be shifted to worker threads.
*/
static bool shift_completed_task(parsec_device_gpu_module_t* gpu_device, parsec_gpu_task_t* gpu_task)
{
parsec_task_t* this_task = gpu_task->ec;
const __parsec_chore_t *chore = &this_task->task_class->incarnations[gpu_device->super.device_index];
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think this is correct, the incarnations are ordered by device type not by device index. You can find the current chore in this_task->selected_chore, or you can parse the incarnations array looking for a chore with the same device_type as the gpu_device.

return (bool)(chore->flags & PARSEC_CHORE_FLAG_SHIFT_COMPLETION);
}

/**
* This version is based on 4 streams: one for transfers from the memory to
* the GPU, 2 for kernel executions and one for transfers from the GPU into
Expand Down Expand Up @@ -2703,8 +2713,17 @@ parsec_device_kernel_scheduler( parsec_device_module_t *module,
goto remove_gpu_task;
}
parsec_device_kernel_epilog( gpu_device, gpu_task );
__parsec_complete_execution( es, gpu_task->ec );

if (shift_completed_task(gpu_device, gpu_task)) {
// ship the task to other threads to complete its execution
gpu_task->ec->status = PARSEC_TASK_STATUS_COMPLETE;
PARSEC_LIST_ITEM_SINGLETON(gpu_task->ec);
__parsec_schedule(es, gpu_task->ec, 1);
} else {
__parsec_complete_execution( es, gpu_task->ec );
}
gpu_device->super.executed_tasks++;

remove_gpu_task:
PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "GPU[%d:%s]: gpu_task %p freed",
gpu_device->super.device_index, gpu_device->super.name,
Expand Down
5 changes: 4 additions & 1 deletion parsec/parsec_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -392,9 +392,12 @@ int parsec_update_deps_with_counter_count_task(parsec_taskpool_t *tp,
const parsec_task_t* PARSEC_RESTRICT origin,
const parsec_flow_t* PARSEC_RESTRICT origin_flow,
const parsec_flow_t* PARSEC_RESTRICT dest_flow);


#define PARSEC_CHORE_FLAG_NONE 0x00
#define PARSEC_CHORE_FLAG_SHIFT_COMPLETION 0x01
typedef struct __parsec_internal_incarnation_s {
int32_t type;
int32_t flags;
parsec_evaluate_function_t *evaluate;
parsec_hook_t *hook;
char *dyld;
Expand Down
Loading