Skip to content

Commit dafb413

Browse files
authored
Fix stream usage in C API (#3713)
* Fix stream/order usage in C API and tests. Make most C API tests with CPU backen usable without a GPU. * Make C API CpuOnly test only run for CPU backend. * Use AccessOrder in copyX2Y. Add more CPU-only tests. * Extend the range of CPU-only native tests * Adjust cpplint for C++17 structured bindings. Signed-off-by: Michał Zientkiewicz <mzient@gmail.com>
1 parent 7ae8b15 commit dafb413

8 files changed

Lines changed: 254 additions & 179 deletions

File tree

dali/c_api/c_api.cc

Lines changed: 60 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright (c) 2017-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1+
// Copyright (c) 2017-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22
//
33
// Licensed under the Apache License, Version 2.0 (the "License");
44
// you may not use this file except in compliance with the License.
@@ -32,6 +32,10 @@
3232
#include "dali/pipeline/data/backend.h"
3333
#include "dali/pipeline/data/copy_to_external.h"
3434

35+
using dali::AccessOrder;
36+
using dali::CPUBackend;
37+
using dali::GPUBackend;
38+
3539
namespace {
3640

3741
bool dali_initialized = false;
@@ -95,10 +99,16 @@ void SetExternalInput(daliPipelineHandle *pipe_handle, const char *name, const v
9599
// TensorList, as we must also set the shape and type metadata.
96100
// It is passed further as const TensorList, so it's data cannot be modified.
97101
data.set_pinned(flags & DALI_ext_pinned);
102+
AccessOrder order;
103+
if (std::is_same_v<Backend, GPUBackend> || (flags & DALI_ext_pinned))
104+
order = AccessOrder(stream);
105+
else
106+
order = AccessOrder::host();
107+
data.set_order(order);
98108
data.ShareData(const_cast<void *>(data_ptr), tl_shape.num_elements() * elem_sizeof);
99109
data.Resize(tl_shape, type_id);
100110
data.SetLayout(layout);
101-
pipeline->SetExternalInput(name, data, stream,
111+
pipeline->SetExternalInput(name, data, order,
102112
flags & DALI_ext_force_sync,
103113
flags & DALI_use_copy_kernel,
104114
GetExternalSourceCopyMode(flags));
@@ -122,16 +132,24 @@ void SetExternalInputTensors(daliPipelineHandle *pipe_handle, const char *name,
122132
dali::TensorVector<Backend> data(curr_batch_size);
123133
auto type_id = static_cast<dali::DALIDataType>(data_type);
124134
auto elem_sizeof = dali::TypeTable::GetTypeInfo(type_id).size();
135+
136+
AccessOrder order;
137+
if (std::is_same_v<Backend, GPUBackend> || (flags & DALI_ext_pinned))
138+
order = AccessOrder(stream);
139+
else
140+
order = AccessOrder::host();
141+
125142
for (int i = 0; i < curr_batch_size; i++) {
126143
// We cast away the const from data_ptr, as there is no other way of passing it to the
127144
// Tensor as we must also set the shape and type metadata.
128145
// The vector that we pass to pipeline is const.
129146
data[i].set_pinned(flags & DALI_ext_pinned);
147+
data[i].set_order(order);
130148
data[i].ShareData(const_cast<void *>(data_ptr[i]), tl_shape[i].num_elements() * elem_sizeof);
131149
data[i].Resize(tl_shape[i], type_id);
132150
data[i].SetLayout(layout);
133151
}
134-
pipeline->SetExternalInput(name, data, stream,
152+
pipeline->SetExternalInput(name, data, order,
135153
flags & DALI_ext_force_sync,
136154
flags & DALI_use_copy_kernel,
137155
GetExternalSourceCopyMode(flags));
@@ -246,12 +264,12 @@ void daliSetExternalInputAsync(daliPipelineHandle *pipe_handle, const char *name
246264
unsigned int flags) {
247265
switch (device) {
248266
case device_type_t::CPU:
249-
SetExternalInput<dali::CPUBackend>(pipe_handle, name, data_ptr, data_type, shapes, sample_dim,
250-
layout_str, stream, flags);
267+
SetExternalInput<CPUBackend>(pipe_handle, name, data_ptr, data_type, shapes, sample_dim,
268+
layout_str, stream, flags);
251269
return;
252270
case device_type_t::GPU:
253-
SetExternalInput<dali::GPUBackend>(pipe_handle, name, data_ptr, data_type, shapes, sample_dim,
254-
layout_str, stream, flags);
271+
SetExternalInput<GPUBackend>(pipe_handle, name, data_ptr, data_type, shapes, sample_dim,
272+
layout_str, stream, flags);
255273
return;
256274
default:
257275
DALI_FAIL(dali::make_string("Unknown device: ", device));
@@ -276,12 +294,12 @@ void daliSetExternalInputTensorsAsync(daliPipelineHandle *pipe_handle, const cha
276294
cudaStream_t stream, unsigned int flags) {
277295
switch (device) {
278296
case device_type_t::CPU:
279-
SetExternalInputTensors<dali::CPUBackend>(pipe_handle, name, data_ptr, data_type, shapes,
280-
sample_dim, layout_str, stream, flags);
297+
SetExternalInputTensors<CPUBackend>(pipe_handle, name, data_ptr, data_type, shapes,
298+
sample_dim, layout_str, stream, flags);
281299
return;
282300
case device_type_t::GPU:
283-
SetExternalInputTensors<dali::GPUBackend>(pipe_handle, name, data_ptr, data_type, shapes,
284-
sample_dim, layout_str, stream, flags);
301+
SetExternalInputTensors<GPUBackend>(pipe_handle, name, data_ptr, data_type, shapes,
302+
sample_dim, layout_str, stream, flags);
285303
return;
286304
default:
287305
DALI_FAIL(dali::make_string("Unknown device: ", device));
@@ -317,10 +335,10 @@ void daliOutputRelease(daliPipelineHandle *pipe_handle) {
317335

318336
int64_t daliOutputHasUniformShape(daliPipelineHandle* pipe_handle, int i) {
319337
dali::DeviceWorkspace* ws = reinterpret_cast<dali::DeviceWorkspace*>(pipe_handle->ws);
320-
if (ws->OutputIsType<dali::CPUBackend>(i)) {
321-
return is_uniform(ws->Output<dali::CPUBackend>(i).shape());
338+
if (ws->OutputIsType<CPUBackend>(i)) {
339+
return is_uniform(ws->Output<CPUBackend>(i).shape());
322340
} else {
323-
return is_uniform(ws->Output<dali::GPUBackend>(i).shape());
341+
return is_uniform(ws->Output<GPUBackend>(i).shape());
324342
}
325343
}
326344

@@ -349,10 +367,10 @@ static int64_t *daliShapeAtHelper(dali::DeviceWorkspace *ws, int n, int k) {
349367

350368
static int64_t* daliShapeAtTypedHelper(daliPipelineHandle* pipe_handle, int n, int k) {
351369
dali::DeviceWorkspace* ws = reinterpret_cast<dali::DeviceWorkspace*>(pipe_handle->ws);
352-
if (ws->OutputIsType<dali::CPUBackend>(n)) {
353-
return daliShapeAtHelper<dali::CPUBackend>(ws, n, k);
370+
if (ws->OutputIsType<CPUBackend>(n)) {
371+
return daliShapeAtHelper<CPUBackend>(ws, n, k);
354372
} else {
355-
return daliShapeAtHelper<dali::GPUBackend>(ws, n, k);
373+
return daliShapeAtHelper<GPUBackend>(ws, n, k);
356374
}
357375
}
358376

@@ -373,10 +391,10 @@ static dali_data_type_t daliTypeAtHelper(dali::DeviceWorkspace* ws, int n) {
373391

374392
dali_data_type_t daliTypeAt(daliPipelineHandle* pipe_handle, int n) {
375393
dali::DeviceWorkspace* ws = reinterpret_cast<dali::DeviceWorkspace*>(pipe_handle->ws);
376-
if (ws->OutputIsType<dali::CPUBackend>(n)) {
377-
return daliTypeAtHelper<dali::CPUBackend>(ws, n);
394+
if (ws->OutputIsType<CPUBackend>(n)) {
395+
return daliTypeAtHelper<CPUBackend>(ws, n);
378396
} else {
379-
return daliTypeAtHelper<dali::GPUBackend>(ws, n);
397+
return daliTypeAtHelper<GPUBackend>(ws, n);
380398
}
381399
}
382400

@@ -388,10 +406,10 @@ static size_t daliNumTensorsHelper(dali::DeviceWorkspace* ws, int n) {
388406

389407
size_t daliNumTensors(daliPipelineHandle* pipe_handle, int n) {
390408
dali::DeviceWorkspace* ws = reinterpret_cast<dali::DeviceWorkspace*>(pipe_handle->ws);
391-
if (ws->OutputIsType<dali::CPUBackend>(n)) {
392-
return daliNumTensorsHelper<dali::CPUBackend>(ws, n);
409+
if (ws->OutputIsType<CPUBackend>(n)) {
410+
return daliNumTensorsHelper<CPUBackend>(ws, n);
393411
} else {
394-
return daliNumTensorsHelper<dali::GPUBackend>(ws, n);
412+
return daliNumTensorsHelper<GPUBackend>(ws, n);
395413
}
396414
}
397415

@@ -402,10 +420,10 @@ static size_t daliNumElementsHelper(dali::DeviceWorkspace* ws, int n) {
402420

403421
size_t daliNumElements(daliPipelineHandle* pipe_handle, int n) {
404422
dali::DeviceWorkspace* ws = reinterpret_cast<dali::DeviceWorkspace*>(pipe_handle->ws);
405-
if (ws->OutputIsType<dali::CPUBackend>(n)) {
406-
return daliNumElementsHelper<dali::CPUBackend>(ws, n);
423+
if (ws->OutputIsType<CPUBackend>(n)) {
424+
return daliNumElementsHelper<CPUBackend>(ws, n);
407425
} else {
408-
return daliNumElementsHelper<dali::GPUBackend>(ws, n);
426+
return daliNumElementsHelper<GPUBackend>(ws, n);
409427
}
410428
}
411429

@@ -416,10 +434,10 @@ static size_t daliTensorSizeHelper(dali::DeviceWorkspace* ws, int n) {
416434

417435
size_t daliTensorSize(daliPipelineHandle* pipe_handle, int n) {
418436
dali::DeviceWorkspace* ws = reinterpret_cast<dali::DeviceWorkspace*>(pipe_handle->ws);
419-
if (ws->OutputIsType<dali::CPUBackend>(n)) {
420-
return daliTensorSizeHelper<dali::CPUBackend>(ws, n);
437+
if (ws->OutputIsType<CPUBackend>(n)) {
438+
return daliTensorSizeHelper<CPUBackend>(ws, n);
421439
} else {
422-
return daliTensorSizeHelper<dali::GPUBackend>(ws, n);
440+
return daliTensorSizeHelper<GPUBackend>(ws, n);
423441
}
424442
}
425443

@@ -442,10 +460,10 @@ static size_t daliMaxDimTensorsHelper(dali::DeviceWorkspace* ws, int n) {
442460

443461
size_t daliMaxDimTensors(daliPipelineHandle* pipe_handle, int n) {
444462
dali::DeviceWorkspace* ws = reinterpret_cast<dali::DeviceWorkspace*>(pipe_handle->ws);
445-
if (ws->OutputIsType<dali::CPUBackend>(n)) {
446-
return daliMaxDimTensorsHelper<dali::CPUBackend>(ws, n);
463+
if (ws->OutputIsType<CPUBackend>(n)) {
464+
return daliMaxDimTensorsHelper<CPUBackend>(ws, n);
447465
} else {
448-
return daliMaxDimTensorsHelper<dali::GPUBackend>(ws, n);
466+
return daliMaxDimTensorsHelper<GPUBackend>(ws, n);
449467
}
450468
}
451469

@@ -480,11 +498,12 @@ void daliOutputCopy(daliPipelineHandle *pipe_handle, void *dst, int output_idx,
480498
assert(ws != nullptr);
481499

482500
auto &type_info = dali::TypeTable::GetTypeInfo(dali::DALIDataType::DALI_UINT8);
483-
if (ws->OutputIsType<dali::CPUBackend>(output_idx)) {
484-
CopyToExternal(dst, dst_mem_kind, ws->Output<dali::CPUBackend>(output_idx),
485-
stream, use_copy_kernel);
501+
if (ws->OutputIsType<CPUBackend>(output_idx)) {
502+
AccessOrder order = is_pinned ? AccessOrder(stream) : AccessOrder::host();
503+
CopyToExternal(dst, dst_mem_kind, ws->Output<CPUBackend>(output_idx),
504+
order, use_copy_kernel);
486505
} else {
487-
CopyToExternal(dst, dst_mem_kind, ws->Output<dali::GPUBackend>(output_idx),
506+
CopyToExternal(dst, dst_mem_kind, ws->Output<GPUBackend>(output_idx),
488507
stream, use_copy_kernel);
489508
}
490509
if (sync) {
@@ -505,11 +524,12 @@ void daliOutputCopySamples(daliPipelineHandle *pipe_handle, void **dsts, int out
505524
assert(ws != nullptr);
506525

507526
auto &type_info = dali::TypeTable::GetTypeInfo(dali::DALIDataType::DALI_UINT8);
508-
if (ws->OutputIsType<dali::CPUBackend>(output_idx)) {
509-
CopyToExternal(dsts, dst_mem_kind, ws->Output<dali::CPUBackend>(output_idx),
510-
stream, use_copy_kernel);
527+
if (ws->OutputIsType<CPUBackend>(output_idx)) {
528+
AccessOrder order = is_pinned ? AccessOrder(stream) : AccessOrder::host();
529+
CopyToExternal(dsts, dst_mem_kind, ws->Output<CPUBackend>(output_idx),
530+
order, use_copy_kernel);
511531
} else {
512-
CopyToExternal(dsts, dst_mem_kind, ws->Output<dali::GPUBackend>(output_idx),
532+
CopyToExternal(dsts, dst_mem_kind, ws->Output<GPUBackend>(output_idx),
513533
stream, use_copy_kernel);
514534
}
515535
if (sync) {

0 commit comments

Comments
 (0)