1- // Copyright (c) 2017-2021 , NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1+ // Copyright (c) 2017-2022 , NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22//
33// Licensed under the Apache License, Version 2.0 (the "License");
44// you may not use this file except in compliance with the License.
3232#include " dali/pipeline/data/backend.h"
3333#include " dali/pipeline/data/copy_to_external.h"
3434
35+ using dali::AccessOrder;
36+ using dali::CPUBackend;
37+ using dali::GPUBackend;
38+
3539namespace {
3640
3741bool dali_initialized = false ;
@@ -95,10 +99,16 @@ void SetExternalInput(daliPipelineHandle *pipe_handle, const char *name, const v
9599 // TensorList, as we must also set the shape and type metadata.
96100 // It is passed further as const TensorList, so it's data cannot be modified.
97101 data.set_pinned (flags & DALI_ext_pinned);
102+ AccessOrder order;
103+ if (std::is_same_v<Backend, GPUBackend> || (flags & DALI_ext_pinned))
104+ order = AccessOrder (stream);
105+ else
106+ order = AccessOrder::host ();
107+ data.set_order (order);
98108 data.ShareData (const_cast <void *>(data_ptr), tl_shape.num_elements () * elem_sizeof);
99109 data.Resize (tl_shape, type_id);
100110 data.SetLayout (layout);
101- pipeline->SetExternalInput (name, data, stream ,
111+ pipeline->SetExternalInput (name, data, order ,
102112 flags & DALI_ext_force_sync,
103113 flags & DALI_use_copy_kernel,
104114 GetExternalSourceCopyMode (flags));
@@ -122,16 +132,24 @@ void SetExternalInputTensors(daliPipelineHandle *pipe_handle, const char *name,
122132 dali::TensorVector<Backend> data (curr_batch_size);
123133 auto type_id = static_cast <dali::DALIDataType>(data_type);
124134 auto elem_sizeof = dali::TypeTable::GetTypeInfo (type_id).size ();
135+
136+ AccessOrder order;
137+ if (std::is_same_v<Backend, GPUBackend> || (flags & DALI_ext_pinned))
138+ order = AccessOrder (stream);
139+ else
140+ order = AccessOrder::host ();
141+
125142 for (int i = 0 ; i < curr_batch_size; i++) {
126143 // We cast away the const from data_ptr, as there is no other way of passing it to the
127144 // Tensor as we must also set the shape and type metadata.
128145 // The vector that we pass to pipeline is const.
129146 data[i].set_pinned (flags & DALI_ext_pinned);
147+ data[i].set_order (order);
130148 data[i].ShareData (const_cast <void *>(data_ptr[i]), tl_shape[i].num_elements () * elem_sizeof);
131149 data[i].Resize (tl_shape[i], type_id);
132150 data[i].SetLayout (layout);
133151 }
134- pipeline->SetExternalInput (name, data, stream ,
152+ pipeline->SetExternalInput (name, data, order ,
135153 flags & DALI_ext_force_sync,
136154 flags & DALI_use_copy_kernel,
137155 GetExternalSourceCopyMode (flags));
@@ -246,12 +264,12 @@ void daliSetExternalInputAsync(daliPipelineHandle *pipe_handle, const char *name
246264 unsigned int flags) {
247265 switch (device) {
248266 case device_type_t ::CPU:
249- SetExternalInput<dali:: CPUBackend>(pipe_handle, name, data_ptr, data_type, shapes, sample_dim,
250- layout_str, stream, flags);
267+ SetExternalInput<CPUBackend>(pipe_handle, name, data_ptr, data_type, shapes, sample_dim,
268+ layout_str, stream, flags);
251269 return ;
252270 case device_type_t ::GPU:
253- SetExternalInput<dali:: GPUBackend>(pipe_handle, name, data_ptr, data_type, shapes, sample_dim,
254- layout_str, stream, flags);
271+ SetExternalInput<GPUBackend>(pipe_handle, name, data_ptr, data_type, shapes, sample_dim,
272+ layout_str, stream, flags);
255273 return ;
256274 default :
257275 DALI_FAIL (dali::make_string (" Unknown device: " , device));
@@ -276,12 +294,12 @@ void daliSetExternalInputTensorsAsync(daliPipelineHandle *pipe_handle, const cha
276294 cudaStream_t stream, unsigned int flags) {
277295 switch (device) {
278296 case device_type_t ::CPU:
279- SetExternalInputTensors<dali:: CPUBackend>(pipe_handle, name, data_ptr, data_type, shapes,
280- sample_dim, layout_str, stream, flags);
297+ SetExternalInputTensors<CPUBackend>(pipe_handle, name, data_ptr, data_type, shapes,
298+ sample_dim, layout_str, stream, flags);
281299 return ;
282300 case device_type_t ::GPU:
283- SetExternalInputTensors<dali:: GPUBackend>(pipe_handle, name, data_ptr, data_type, shapes,
284- sample_dim, layout_str, stream, flags);
301+ SetExternalInputTensors<GPUBackend>(pipe_handle, name, data_ptr, data_type, shapes,
302+ sample_dim, layout_str, stream, flags);
285303 return ;
286304 default :
287305 DALI_FAIL (dali::make_string (" Unknown device: " , device));
@@ -317,10 +335,10 @@ void daliOutputRelease(daliPipelineHandle *pipe_handle) {
317335
318336int64_t daliOutputHasUniformShape (daliPipelineHandle* pipe_handle, int i) {
319337 dali::DeviceWorkspace* ws = reinterpret_cast <dali::DeviceWorkspace*>(pipe_handle->ws );
320- if (ws->OutputIsType <dali:: CPUBackend>(i)) {
321- return is_uniform (ws->Output <dali:: CPUBackend>(i).shape ());
338+ if (ws->OutputIsType <CPUBackend>(i)) {
339+ return is_uniform (ws->Output <CPUBackend>(i).shape ());
322340 } else {
323- return is_uniform (ws->Output <dali:: GPUBackend>(i).shape ());
341+ return is_uniform (ws->Output <GPUBackend>(i).shape ());
324342 }
325343}
326344
@@ -349,10 +367,10 @@ static int64_t *daliShapeAtHelper(dali::DeviceWorkspace *ws, int n, int k) {
349367
350368static int64_t * daliShapeAtTypedHelper (daliPipelineHandle* pipe_handle, int n, int k) {
351369 dali::DeviceWorkspace* ws = reinterpret_cast <dali::DeviceWorkspace*>(pipe_handle->ws );
352- if (ws->OutputIsType <dali:: CPUBackend>(n)) {
353- return daliShapeAtHelper<dali:: CPUBackend>(ws, n, k);
370+ if (ws->OutputIsType <CPUBackend>(n)) {
371+ return daliShapeAtHelper<CPUBackend>(ws, n, k);
354372 } else {
355- return daliShapeAtHelper<dali:: GPUBackend>(ws, n, k);
373+ return daliShapeAtHelper<GPUBackend>(ws, n, k);
356374 }
357375}
358376
@@ -373,10 +391,10 @@ static dali_data_type_t daliTypeAtHelper(dali::DeviceWorkspace* ws, int n) {
373391
374392dali_data_type_t daliTypeAt (daliPipelineHandle* pipe_handle, int n) {
375393 dali::DeviceWorkspace* ws = reinterpret_cast <dali::DeviceWorkspace*>(pipe_handle->ws );
376- if (ws->OutputIsType <dali:: CPUBackend>(n)) {
377- return daliTypeAtHelper<dali:: CPUBackend>(ws, n);
394+ if (ws->OutputIsType <CPUBackend>(n)) {
395+ return daliTypeAtHelper<CPUBackend>(ws, n);
378396 } else {
379- return daliTypeAtHelper<dali:: GPUBackend>(ws, n);
397+ return daliTypeAtHelper<GPUBackend>(ws, n);
380398 }
381399}
382400
@@ -388,10 +406,10 @@ static size_t daliNumTensorsHelper(dali::DeviceWorkspace* ws, int n) {
388406
389407size_t daliNumTensors (daliPipelineHandle* pipe_handle, int n) {
390408 dali::DeviceWorkspace* ws = reinterpret_cast <dali::DeviceWorkspace*>(pipe_handle->ws );
391- if (ws->OutputIsType <dali:: CPUBackend>(n)) {
392- return daliNumTensorsHelper<dali:: CPUBackend>(ws, n);
409+ if (ws->OutputIsType <CPUBackend>(n)) {
410+ return daliNumTensorsHelper<CPUBackend>(ws, n);
393411 } else {
394- return daliNumTensorsHelper<dali:: GPUBackend>(ws, n);
412+ return daliNumTensorsHelper<GPUBackend>(ws, n);
395413 }
396414}
397415
@@ -402,10 +420,10 @@ static size_t daliNumElementsHelper(dali::DeviceWorkspace* ws, int n) {
402420
403421size_t daliNumElements (daliPipelineHandle* pipe_handle, int n) {
404422 dali::DeviceWorkspace* ws = reinterpret_cast <dali::DeviceWorkspace*>(pipe_handle->ws );
405- if (ws->OutputIsType <dali:: CPUBackend>(n)) {
406- return daliNumElementsHelper<dali:: CPUBackend>(ws, n);
423+ if (ws->OutputIsType <CPUBackend>(n)) {
424+ return daliNumElementsHelper<CPUBackend>(ws, n);
407425 } else {
408- return daliNumElementsHelper<dali:: GPUBackend>(ws, n);
426+ return daliNumElementsHelper<GPUBackend>(ws, n);
409427 }
410428}
411429
@@ -416,10 +434,10 @@ static size_t daliTensorSizeHelper(dali::DeviceWorkspace* ws, int n) {
416434
417435size_t daliTensorSize (daliPipelineHandle* pipe_handle, int n) {
418436 dali::DeviceWorkspace* ws = reinterpret_cast <dali::DeviceWorkspace*>(pipe_handle->ws );
419- if (ws->OutputIsType <dali:: CPUBackend>(n)) {
420- return daliTensorSizeHelper<dali:: CPUBackend>(ws, n);
437+ if (ws->OutputIsType <CPUBackend>(n)) {
438+ return daliTensorSizeHelper<CPUBackend>(ws, n);
421439 } else {
422- return daliTensorSizeHelper<dali:: GPUBackend>(ws, n);
440+ return daliTensorSizeHelper<GPUBackend>(ws, n);
423441 }
424442}
425443
@@ -442,10 +460,10 @@ static size_t daliMaxDimTensorsHelper(dali::DeviceWorkspace* ws, int n) {
442460
443461size_t daliMaxDimTensors (daliPipelineHandle* pipe_handle, int n) {
444462 dali::DeviceWorkspace* ws = reinterpret_cast <dali::DeviceWorkspace*>(pipe_handle->ws );
445- if (ws->OutputIsType <dali:: CPUBackend>(n)) {
446- return daliMaxDimTensorsHelper<dali:: CPUBackend>(ws, n);
463+ if (ws->OutputIsType <CPUBackend>(n)) {
464+ return daliMaxDimTensorsHelper<CPUBackend>(ws, n);
447465 } else {
448- return daliMaxDimTensorsHelper<dali:: GPUBackend>(ws, n);
466+ return daliMaxDimTensorsHelper<GPUBackend>(ws, n);
449467 }
450468}
451469
@@ -480,11 +498,12 @@ void daliOutputCopy(daliPipelineHandle *pipe_handle, void *dst, int output_idx,
480498 assert (ws != nullptr );
481499
482500 auto &type_info = dali::TypeTable::GetTypeInfo (dali::DALIDataType::DALI_UINT8);
483- if (ws->OutputIsType <dali::CPUBackend>(output_idx)) {
484- CopyToExternal (dst, dst_mem_kind, ws->Output <dali::CPUBackend>(output_idx),
485- stream, use_copy_kernel);
501+ if (ws->OutputIsType <CPUBackend>(output_idx)) {
502+ AccessOrder order = is_pinned ? AccessOrder (stream) : AccessOrder::host ();
503+ CopyToExternal (dst, dst_mem_kind, ws->Output <CPUBackend>(output_idx),
504+ order, use_copy_kernel);
486505 } else {
487- CopyToExternal (dst, dst_mem_kind, ws->Output <dali:: GPUBackend>(output_idx),
506+ CopyToExternal (dst, dst_mem_kind, ws->Output <GPUBackend>(output_idx),
488507 stream, use_copy_kernel);
489508 }
490509 if (sync) {
@@ -505,11 +524,12 @@ void daliOutputCopySamples(daliPipelineHandle *pipe_handle, void **dsts, int out
505524 assert (ws != nullptr );
506525
507526 auto &type_info = dali::TypeTable::GetTypeInfo (dali::DALIDataType::DALI_UINT8);
508- if (ws->OutputIsType <dali::CPUBackend>(output_idx)) {
509- CopyToExternal (dsts, dst_mem_kind, ws->Output <dali::CPUBackend>(output_idx),
510- stream, use_copy_kernel);
527+ if (ws->OutputIsType <CPUBackend>(output_idx)) {
528+ AccessOrder order = is_pinned ? AccessOrder (stream) : AccessOrder::host ();
529+ CopyToExternal (dsts, dst_mem_kind, ws->Output <CPUBackend>(output_idx),
530+ order, use_copy_kernel);
511531 } else {
512- CopyToExternal (dsts, dst_mem_kind, ws->Output <dali:: GPUBackend>(output_idx),
532+ CopyToExternal (dsts, dst_mem_kind, ws->Output <GPUBackend>(output_idx),
513533 stream, use_copy_kernel);
514534 }
515535 if (sync) {
0 commit comments