@@ -410,15 +410,93 @@ ACL_EXPORT
410410// CL_API_ENTRY cl_int clEnqueueReadGlobalVariableINTEL() {
411411CL_API_ENTRY cl_int clEnqueueReadGlobalVariableINTEL (
412412 cl_command_queue command_queue, cl_program program, const char *name,
413- cl_bool blocking_write , size_t size, size_t offset, void *ptr,
413+ cl_bool blocking_read , size_t size, size_t offset, void *ptr,
414414 cl_uint num_events_in_wait_list, const cl_event *event_wait_list,
415415 cl_event *event) {
416+ cl_int status;
416417
417- // TODO: get dev_global_ptr from autodiscovery instead later
418- // return 0;
419- return clEnqueueWriteGlobalVariableINTEL (
420- command_queue, program, name, blocking_write, size, offset, ptr,
421- num_events_in_wait_list, event_wait_list, event);
418+ cl_kernel kernel = clCreateKernelIntelFPGA (program, name, &status);
419+ if (status != CL_SUCCESS ) {
420+ return status;
421+ }
422+
423+ // dev_addr_t dev_global_address =
424+ // kernel->dev_bin->get_devdef().autodiscovery_def.?
425+ uintptr_t dev_global_address = 0x4000000 ;
426+ void *dev_global_ptr =
427+ (void *)(dev_global_address + offset * 8 ); // 1 unit of offset is 8 bits
428+ status = set_kernel_arg_mem_pointer_without_checks (kernel, 0 , dev_global_ptr);
429+ // status = clSetKernelArgMemPointerINTEL(kernel, 1, dev_global_ptr);
430+ if (status != CL_SUCCESS ) {
431+ return status;
432+ }
433+
434+ // Copy device global memory to temporary device usm pointer first
435+ void *tmp_dev_ptr = clDeviceMemAllocINTEL (
436+ command_queue->context , command_queue->device , NULL , size, 1 , &status);
437+ if (status != CL_SUCCESS ) {
438+ return status;
439+ }
440+ if (!tmp_dev_ptr) {
441+ return CL_MEM_OBJECT_ALLOCATION_FAILURE ;
442+ }
443+
444+ status = clSetKernelArgMemPointerINTEL (kernel, 1 , tmp_dev_ptr);
445+ if (status != CL_SUCCESS ) {
446+ return status;
447+ }
448+
449+ // Set size kernel arg
450+ status = clSetKernelArg (kernel, 2 , sizeof (size_t ), (const void *)(&size));
451+ if (status != CL_SUCCESS ) {
452+ return status;
453+ }
454+
455+ cl_event tmp_event = 0 ;
456+ status = clEnqueueTask (command_queue, kernel, num_events_in_wait_list,
457+ event_wait_list, &tmp_event);
458+ if (status != CL_SUCCESS ) {
459+ return status;
460+ }
461+ std::cerr << tmp_event->cmd .info .ndrange_kernel .invocation_wrapper ->image
462+ ->activation_id
463+ << std::endl;
464+
465+ // copy from the temporary device memory into user provided pointer
466+ std::cerr << " read: copy from tmp dev pointer to source pointer" << std::endl;
467+ status = clEnqueueMemcpyINTEL (command_queue, blocking_read, ptr, tmp_dev_ptr,
468+ size, 1 , &tmp_event, event);
469+ if (status != CL_SUCCESS ) {
470+ return status;
471+ }
472+
473+ if (blocking_read) {
474+ status = clReleaseEvent (tmp_event);
475+ if (status != CL_SUCCESS ) {
476+ return status;
477+ }
478+ status = clMemFreeINTEL (command_queue->context , tmp_dev_ptr);
479+ if (status != CL_SUCCESS ) {
480+ return status;
481+ }
482+ status = clReleaseKernel (kernel);
483+ if (status != CL_SUCCESS ) {
484+ return status;
485+ }
486+ } else {
487+ // Clean up resources after event finishes
488+ void **callback_data = (void **)acl_malloc (sizeof (void *) * 3 );
489+ if (!callback_data) {
490+ return CL_OUT_OF_HOST_MEMORY ;
491+ }
492+ callback_data[0 ] = (void *)(tmp_dev_ptr);
493+ callback_data[1 ] = (void *)(kernel);
494+ callback_data[2 ] = (void *)(tmp_event);
495+ clSetEventCallback (*event, CL_COMPLETE , acl_dev_global_cleanup,
496+ (void *)callback_data);
497+ }
498+
499+ return CL_SUCCESS ;
422500}
423501
424502ACL_EXPORT
@@ -452,6 +530,7 @@ CL_API_ENTRY cl_int clEnqueueWriteGlobalVariableINTEL(
452530 if (status != CL_SUCCESS ) {
453531 return status;
454532 }
533+
455534 // if (to_dev_event->execution_status != CL_COMPLETE) {
456535 // return CL_INVALID_OPERATION;
457536 // }
@@ -473,11 +552,10 @@ CL_API_ENTRY cl_int clEnqueueWriteGlobalVariableINTEL(
473552 // dev_addr_t dev_global_address =
474553 // kernel->dev_bin->get_devdef().autodiscovery_def.?
475554 uintptr_t dev_global_address = 0x4000000 ;
476- void *dev_global_ptr2 =
555+ void *dev_global_ptr =
477556 (void *)(dev_global_address + offset * 8 ); // 1 unit of offset is 8 bits
478- status =
479- set_kernel_arg_mem_pointer_without_checks (kernel, 1 , dev_global_ptr2);
480- // status = clSetKernelArgMemPointerINTEL(kernel, 1, dev_global_ptr2);
557+ status = set_kernel_arg_mem_pointer_without_checks (kernel, 1 , dev_global_ptr);
558+ // status = clSetKernelArgMemPointerINTEL(kernel, 1, dev_global_ptr);
481559 if (status != CL_SUCCESS ) {
482560 return status;
483561 }
@@ -499,24 +577,54 @@ CL_API_ENTRY cl_int clEnqueueWriteGlobalVariableINTEL(
499577
500578 if (blocking_write) {
501579 status = clWaitForEvents (1 , event);
580+ if (status == CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST ) {
581+ return status;
582+ }
583+ status = clMemFreeINTEL (command_queue->context , src_dev_ptr);
584+ if (status != CL_SUCCESS ) {
585+ return status;
586+ }
587+ status = clReleaseKernel (kernel);
588+ if (status != CL_SUCCESS ) {
589+ return status;
590+ }
591+ } else {
592+ // Clean up resources after event finishes
593+ void **callback_data = (void **)acl_malloc (sizeof (void *) * 3 );
594+ if (!callback_data) {
595+ return CL_OUT_OF_HOST_MEMORY ;
596+ }
597+ callback_data[0 ] = (void *)(src_dev_ptr);
598+ callback_data[1 ] = (void *)(kernel);
599+ clSetEventCallback (*event, CL_COMPLETE , acl_dev_global_cleanup,
600+ (void *)callback_data);
502601 }
503602
504- if (blocking_write &&
505- status == CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST ) {
506- return status;
507- }
603+ return CL_SUCCESS ;
604+ }
508605
509- // Free allocated device memory
510- status = clMemFreeINTEL (command_queue->context , src_dev_ptr);
511- if (status != CL_SUCCESS ) {
512- return status;
606+ void CL_CALLBACK acl_dev_global_cleanup (cl_event event,
607+ cl_int event_command_exec_status,
608+ void *callback_data) {
609+ void **callback_ptrs =
610+ (void **)callback_data; // callback_ptrs[0] is usm device pointer
611+ // callback_ptrs[1] kernel to be released
612+ // callback_ptrs[2] temporary event to be released
613+ event_command_exec_status =
614+ event_command_exec_status; // Avoiding Windows warning.
615+ event = event;
616+ acl_lock ();
617+ if (callback_ptrs[0 ]) {
618+ clMemFreeINTEL (event->context , callback_ptrs[0 ]);
513619 }
514- // status = clReleaseKernel(kernel);
515- // if (status != CL_SUCCESS) {
516- // return status;
517- // }
518-
519- return CL_SUCCESS ;
620+ if (callback_ptrs[1 ]) {
621+ clReleaseKernel (((cl_kernel)callback_ptrs[1 ]));
622+ }
623+ if (callback_ptrs[2 ]) {
624+ clReleaseEvent (((cl_event)callback_ptrs[2 ]));
625+ }
626+ acl_free (callback_data);
627+ acl_unlock ();
520628}
521629
522630ACL_EXPORT
0 commit comments