Skip to content

Commit 779eb39

Browse files
authored
Support for runtime CPU/CUDA selection (#3060)
CUDA use in dlib can now be toggled at runtime. It's decided at program startup based on cuda availability now. So if you build dlib to use cuda and there isn't a GPU it will not use a GPU automatically. Or if you have GPU and dlib is built to use a GPU but you don't want to use it you can disable that by setting the `CUDA_VISIBLE_DEVICES` env var to empty and no GPU will be used, or you may set the `DLIB_DISABLE_CUDA_USE` env var to false and dlib won't use cuda.
1 parent 173d93e commit 779eb39

14 files changed

Lines changed: 1508 additions & 1052 deletions

dlib/cuda/cuda_dlib.cu

Lines changed: 66 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,42 @@
55
#include "cuda_dlib.h"
66
#include "cudnn_dlibapi.h"
77
#include <math_constants.h>
8+
#include <cstdlib>
9+
#include <cstring>
810

911

1012
namespace dlib
1113
{
14+
namespace
15+
{
16+
bool cuda_device_available (
17+
)
18+
{
19+
int num_devices;
20+
return cudaGetDeviceCount(&num_devices) == cudaSuccess && num_devices > 0;
21+
}
22+
23+
bool cuda_disabled_by_environment (
24+
)
25+
{
26+
const char* var = std::getenv("DLIB_DISABLE_CUDA_USE");
27+
return var != nullptr &&
28+
std::strcmp(var, "") != 0 &&
29+
std::strcmp(var, "0") != 0 &&
30+
std::strcmp(var, "false") != 0 &&
31+
std::strcmp(var, "False") != 0 &&
32+
std::strcmp(var, "FALSE") != 0;
33+
}
34+
35+
bool use_cuda_impl (
36+
)
37+
{
38+
static const bool var = !cuda_disabled_by_environment() && cuda_device_available();
39+
return var;
40+
}
41+
42+
}
43+
1244
namespace cuda
1345
{
1446

@@ -18,21 +50,34 @@ namespace dlib
1850
int dev
1951
)
2052
{
53+
if (!use_cuda())
54+
{
55+
DLIB_CASSERT(dev == 0, "dlib::cuda::set_device(id) called with an invalid device id.");
56+
return;
57+
}
58+
2159
CHECK_CUDA(cudaSetDevice(dev));
2260
}
2361

2462
int get_device (
2563
)
2664
{
2765
int dev = 0;
28-
CHECK_CUDA(cudaGetDevice(&dev));
66+
if (use_cuda())
67+
CHECK_CUDA(cudaGetDevice(&dev));
2968
return dev;
3069
}
3170

3271
std::string get_device_name (
3372
int device
3473
)
3574
{
75+
if (!use_cuda())
76+
{
77+
DLIB_CASSERT(device == 0, "dlib::cuda::get_device_name(device) called with an invalid device id.");
78+
return "CUDA_DISABLED";
79+
}
80+
3681
cudaDeviceProp props;
3782
CHECK_CUDA(cudaGetDeviceProperties(&props, device));
3883
return props.name;
@@ -41,19 +86,32 @@ namespace dlib
4186
void set_current_device_blocking_sync(
4287
)
4388
{
44-
CHECK_CUDA(cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync));
89+
if (use_cuda())
90+
CHECK_CUDA(cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync));
91+
}
92+
93+
bool use_cuda(
94+
)
95+
{
96+
return use_cuda_impl();
4597
}
4698

4799
int get_num_devices (
48100
)
49101
{
102+
if (!use_cuda())
103+
return 0;
104+
50105
int num_devices;
51106
CHECK_CUDA(cudaGetDeviceCount(&num_devices));
52107
return num_devices;
53108
}
54109

55110
bool can_access_peer (int device_id, int peer_device_id)
56111
{
112+
if (!use_cuda())
113+
return false;
114+
57115
int can_access;
58116
CHECK_CUDA(cudaDeviceCanAccessPeer(&can_access, device_id, peer_device_id));
59117
return can_access != 0;
@@ -65,6 +123,9 @@ namespace dlib
65123

66124
void device_synchronize (int dev)
67125
{
126+
if (!use_cuda())
127+
return;
128+
68129
raii_set_device set_dev(dev);
69130
CHECK_CUDA(cudaDeviceSynchronize());
70131
}
@@ -76,6 +137,9 @@ namespace dlib
76137
int peer_device_id
77138
) : call_disable(false), device_id(device_id), peer_device_id(peer_device_id)
78139
{
140+
if (!use_cuda())
141+
return;
142+
79143
raii_set_device set_dev(device_id);
80144

81145
auto err = cudaDeviceEnablePeerAccess(peer_device_id, 0);
@@ -3220,4 +3284,3 @@ namespace dlib
32203284

32213285
}
32223286
}
3223-

dlib/cuda/cuda_dlib.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,9 @@ namespace dlib
2525
int get_num_devices (
2626
);
2727

28+
bool use_cuda(
29+
);
30+
2831
std::string get_device_name (
2932
int device
3033
);
@@ -942,6 +945,9 @@ namespace dlib
942945
inline int get_num_devices (
943946
) { return 1; }
944947

948+
inline bool use_cuda(
949+
) { return false; }
950+
945951
inline std::string get_device_name (
946952
int device
947953
)

dlib/cuda/curand_dlibapi.cpp

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#ifdef DLIB_USE_CUDA
77

88
#include "curand_dlibapi.h"
9+
#include "cuda_dlib.h"
910
#include <curand.h>
1011
#include "../string.h"
1112

@@ -47,11 +48,14 @@ namespace dlib
4748
unsigned long long seed
4849
) : handle(nullptr)
4950
{
50-
curandGenerator_t gen;
51-
CHECK_CURAND(curandCreateGenerator(&gen, CURAND_RNG_PSEUDO_DEFAULT));
52-
handle = gen;
51+
if (use_cuda())
52+
{
53+
curandGenerator_t gen;
54+
CHECK_CURAND(curandCreateGenerator(&gen, CURAND_RNG_PSEUDO_DEFAULT));
55+
handle = gen;
5356

54-
CHECK_CURAND(curandSetPseudoRandomGeneratorSeed(gen, seed));
57+
CHECK_CURAND(curandSetPseudoRandomGeneratorSeed(gen, seed));
58+
}
5559
}
5660

5761
curand_generator::

dlib/cuda/gpu_data.cpp

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -54,13 +54,19 @@ namespace dlib
5454
}
5555
else
5656
{
57+
if (!cuda::use_cuda())
58+
{
59+
std::memcpy(dest.host()+dest_offset, src.host()+src_offset, num*sizeof(float));
60+
return;
61+
}
62+
5763
// if we write to the entire thing then we can use device_write_only()
5864
if (dest_offset == 0 && num == dest.size())
5965
{
6066
// copy the memory efficiently based on which copy is current in each object.
6167
if (src.device_ready())
6268
CHECK_CUDA(cudaMemcpy(dest.device_write_only(), src.device()+src_offset, num*sizeof(float), cudaMemcpyDeviceToDevice));
63-
else
69+
else
6470
CHECK_CUDA(cudaMemcpy(dest.device_write_only(), src.host()+src_offset, num*sizeof(float), cudaMemcpyHostToDevice));
6571
}
6672
else
@@ -72,7 +78,7 @@ namespace dlib
7278
CHECK_CUDA(cudaMemcpy(dest.host()+dest_offset, src.device()+src_offset, num*sizeof(float), cudaMemcpyDeviceToHost));
7379
else if (dest.device_ready() && !src.device_ready())
7480
CHECK_CUDA(cudaMemcpy(dest.device()+dest_offset, src.host()+src_offset, num*sizeof(float), cudaMemcpyHostToDevice));
75-
else
81+
else
7682
CHECK_CUDA(cudaMemcpy(dest.host()+dest_offset, src.host()+src_offset, num*sizeof(float), cudaMemcpyHostToHost));
7783
}
7884
}
@@ -147,6 +153,9 @@ namespace dlib
147153
void gpu_data::
148154
async_copy_to_device() const
149155
{
156+
if (!cuda::use_cuda())
157+
return;
158+
150159
if (!device_current)
151160
{
152161
if (device_in_use)
@@ -181,6 +190,7 @@ namespace dlib
181190
host_current = true;
182191
device_current = true;
183192
device_in_use = false;
193+
the_device_id = 0;
184194
data_host.reset();
185195
data_device.reset();
186196
}
@@ -199,6 +209,13 @@ namespace dlib
199209
device_current = true;
200210
device_in_use = false;
201211

212+
if (!cuda::use_cuda())
213+
{
214+
data_host.reset(new float[new_size], std::default_delete<float[]>());
215+
the_device_id = 0;
216+
return;
217+
}
218+
202219
try
203220
{
204221
CHECK_CUDA(cudaGetDevice(&the_device_id));
@@ -251,4 +268,3 @@ namespace dlib
251268
#endif // DLIB_USE_CUDA
252269

253270
#endif // DLIB_GPU_DaTA_CPP_
254-

dlib/cuda/gpu_data.h

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,14 @@
1212
namespace dlib
1313
{
1414

15+
// ----------------------------------------------------------------------------------------
16+
17+
namespace cuda
18+
{
19+
bool use_cuda(
20+
);
21+
}
22+
1523
// ----------------------------------------------------------------------------------------
1624

1725
class gpu_data
@@ -108,6 +116,8 @@ namespace dlib
108116
{
109117
#ifndef DLIB_USE_CUDA
110118
DLIB_CASSERT(false, "CUDA NOT ENABLED");
119+
#else
120+
DLIB_CASSERT(cuda::use_cuda(), "CUDA disabled");
111121
#endif
112122
copy_to_device();
113123
device_in_use = true;
@@ -118,6 +128,8 @@ namespace dlib
118128
{
119129
#ifndef DLIB_USE_CUDA
120130
DLIB_CASSERT(false, "CUDA NOT ENABLED");
131+
#else
132+
DLIB_CASSERT(cuda::use_cuda(), "CUDA disabled");
121133
#endif
122134
copy_to_device();
123135
host_current = false;
@@ -129,6 +141,8 @@ namespace dlib
129141
{
130142
#ifndef DLIB_USE_CUDA
131143
DLIB_CASSERT(false, "CUDA NOT ENABLED");
144+
#else
145+
DLIB_CASSERT(cuda::use_cuda(), "CUDA disabled");
132146
#endif
133147
wait_for_transfer_to_finish();
134148
host_current = false;
@@ -141,7 +155,14 @@ namespace dlib
141155
) const { return host_current; }
142156

143157
bool device_ready (
144-
) const { return device_current && !have_active_transfer; }
158+
) const
159+
{
160+
#ifdef DLIB_USE_CUDA
161+
if (!cuda::use_cuda() && size() != 0)
162+
return false;
163+
#endif
164+
return device_current && !have_active_transfer;
165+
}
145166

146167
size_t size() const { return data_size; }
147168

@@ -263,4 +284,3 @@ namespace dlib
263284
}
264285

265286
#endif // DLIB_GPU_DaTA_H_
266-

dlib/cuda/gpu_data_abstract.h

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,10 @@ namespace dlib
2828
to the host do not happen before the relevant computations have completed.
2929
3030
If DLIB_USE_CUDA is not #defined then this object will not use CUDA at all.
31-
Instead, it will simply store one host side memory block of floats.
31+
Instead, it will simply store one host side memory block of floats.
32+
Similarly, if DLIB_USE_CUDA is #defined but cuda::use_cuda() == false,
33+
then this object will be host only and will not allocate a CUDA device
34+
memory block.
3235
3336
THREAD SAFETY
3437
Instances of this object are not thread-safe. So don't touch one from
@@ -67,6 +70,8 @@ namespace dlib
6770
);
6871
/*!
6972
ensures
73+
- if (cuda::use_cuda() == false) then
74+
- this function does nothing.
7075
- if (!device_ready()) then
7176
- Begins asynchronously copying host data to the device once it is safe
7277
to do so. I.e. This function will wait until any previously
@@ -99,10 +104,12 @@ namespace dlib
99104
) const;
100105
/*!
101106
ensures
102-
- returns true if and only if the device's copy of the data is current.
107+
- returns true if and only if the device's copy of the data exists and is current.
103108
The device's data is current if there aren't any modifications to the
104109
data which were made on the host side that have yet to be copied to the
105110
device.
111+
- if (DLIB_USE_CUDA is defined && cuda::use_cuda() == false && size() != 0) then
112+
- returns false.
106113
!*/
107114

108115
const float* host(
@@ -153,6 +160,7 @@ namespace dlib
153160
/*!
154161
requires
155162
- DLIB_USE_CUDA is #defined
163+
- cuda::use_cuda() == true
156164
ensures
157165
- returns a pointer to the device memory block of size() contiguous float
158166
values or nullptr if size()==0.
@@ -167,6 +175,7 @@ namespace dlib
167175
/*!
168176
requires
169177
- DLIB_USE_CUDA is #defined
178+
- cuda::use_cuda() == true
170179
ensures
171180
- returns a pointer to the device memory block of size() contiguous float
172181
values or nullptr if size()==0.
@@ -182,6 +191,7 @@ namespace dlib
182191
/*!
183192
requires
184193
- DLIB_USE_CUDA is #defined
194+
- cuda::use_cuda() == true
185195
ensures
186196
- This function returns the same pointer as device(), except that it never
187197
performs a host to device memory copy. Instead, it immediately marks the
@@ -263,4 +273,3 @@ namespace dlib
263273
}
264274

265275
#endif // DLIB_GPU_DaTA_ABSTRACT_H_
266-

0 commit comments

Comments
 (0)