Skip to content

Commit e7ff412

Browse files
committed
changed InjectedApplyUpdate to return an error code
1 parent 7b0d6a1 commit e7ff412

8 files changed

Lines changed: 76 additions & 30 deletions

File tree

shared/libebm/compute/Objective.hpp

Lines changed: 35 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -80,10 +80,10 @@ template<typename TObjective,
8080
bool bUseApprox,
8181
size_t cCompilerScores,
8282
int cCompilerPack>
83-
GPU_DEVICE INLINE_RELEASE_TEMPLATED static void DoneBitpacking(
83+
GPU_DEVICE INLINE_RELEASE_TEMPLATED static ErrorEbm DoneBitpacking(
8484
const Objective* const pObjective, ApplyUpdateBridge* const pData) {
8585
const TObjective* const pObjectiveSpecific = static_cast<const TObjective*>(pObjective);
86-
pObjectiveSpecific->template InjectedApplyUpdate<bCollapsed,
86+
return pObjectiveSpecific->template InjectedApplyUpdate<bCollapsed,
8787
bValidation,
8888
bWeight,
8989
bHessian,
@@ -103,7 +103,7 @@ template<typename TObjective,
103103
size_t cCompilerScores,
104104
int cCompilerPack>
105105
struct BitPackObjective final {
106-
GPU_DEVICE INLINE_RELEASE_TEMPLATED static void Func(
106+
GPU_DEVICE INLINE_RELEASE_TEMPLATED static ErrorEbm Func(
107107
const Objective* const pObjective, ApplyUpdateBridge* const pData) {
108108

109109
static_assert(!bCollapsed, "Cannot be bCollapsed since there would be no bitpacking");
@@ -117,18 +117,21 @@ struct BitPackObjective final {
117117
if(0 != cRemnants) {
118118
pData->m_cSamples = cRemnants;
119119

120-
DoneBitpacking<TObjective,
120+
const ErrorEbm error = DoneBitpacking<TObjective,
121121
bCollapsed,
122122
bValidation,
123123
bWeight,
124124
bHessian,
125125
bUseApprox,
126126
cCompilerScores,
127127
k_cItemsPerBitPackUndefined>(pObjective, pData);
128+
if(Error_None != error) {
129+
return error;
130+
}
128131

129132
cSamples -= cRemnants;
130133
if(0 == cSamples) {
131-
return;
134+
return Error_None;
132135
}
133136
pData->m_cSamples = cSamples;
134137

@@ -174,7 +177,7 @@ struct BitPackObjective final {
174177
EBM_ASSERT(nullptr == pData->m_aSampleScores);
175178
}
176179
}
177-
DoneBitpacking<TObjective,
180+
return DoneBitpacking<TObjective,
178181
bCollapsed,
179182
bValidation,
180183
bWeight,
@@ -183,7 +186,7 @@ struct BitPackObjective final {
183186
cCompilerScores,
184187
cCompilerPack>(pObjective, pData);
185188
} else {
186-
BitPackObjective<TObjective,
189+
return BitPackObjective<TObjective,
187190
bCollapsed,
188191
bValidation,
189192
bWeight,
@@ -211,12 +214,12 @@ struct BitPackObjective<TObjective,
211214
cCompilerScores,
212215
k_cItemsPerBitPackUndefined>
213216
final {
214-
GPU_DEVICE INLINE_RELEASE_TEMPLATED static void Func(
217+
GPU_DEVICE INLINE_RELEASE_TEMPLATED static ErrorEbm Func(
215218
const Objective* const pObjective, ApplyUpdateBridge* const pData) {
216219

217220
static_assert(!bCollapsed, "Cannot be bCollapsed since there would be no bitpacking");
218221

219-
DoneBitpacking<TObjective,
222+
return DoneBitpacking<TObjective,
220223
bCollapsed,
221224
bValidation,
222225
bWeight,
@@ -237,9 +240,9 @@ template<typename TObjective,
237240
typename std::enable_if<!(bCollapsed || 1 != cCompilerScores || bUseApprox ||
238241
AccelerationFlags_NONE == TObjective::TFloatInternal::k_zone),
239242
int>::type = 0>
240-
GPU_DEVICE INLINE_RELEASE_TEMPLATED static void ApplyBitpacking(
243+
GPU_DEVICE INLINE_RELEASE_TEMPLATED static ErrorEbm ApplyBitpacking(
241244
const Objective* const pObjective, ApplyUpdateBridge* const pData) {
242-
BitPackObjective<TObjective,
245+
return BitPackObjective<TObjective,
243246
bCollapsed,
244247
bValidation,
245248
bWeight,
@@ -259,9 +262,9 @@ template<typename TObjective,
259262
typename std::enable_if<bCollapsed || 1 != cCompilerScores || bUseApprox ||
260263
AccelerationFlags_NONE == TObjective::TFloatInternal::k_zone,
261264
int>::type = 0>
262-
GPU_DEVICE INLINE_RELEASE_TEMPLATED static void ApplyBitpacking(
265+
GPU_DEVICE INLINE_RELEASE_TEMPLATED static ErrorEbm ApplyBitpacking(
263266
const Objective* const pObjective, ApplyUpdateBridge* const pData) {
264-
DoneBitpacking<TObjective,
267+
return DoneBitpacking<TObjective,
265268
bCollapsed,
266269
bValidation,
267270
bWeight,
@@ -278,9 +281,21 @@ template<typename TObjective,
278281
bool bHessian,
279282
bool bUseApprox,
280283
size_t cCompilerScores>
281-
GPU_GLOBAL static void RemoteApplyUpdate(const Objective* const pObjective, ApplyUpdateBridge* const pData) {
282-
ApplyBitpacking<TObjective, bCollapsed, bValidation, bWeight, bHessian, bUseApprox, cCompilerScores>(
283-
pObjective, pData);
284+
GPU_GLOBAL static void RemoteApplyUpdate(
285+
const Objective* const pObjective, ApplyUpdateBridge* const pData, ErrorEbm* const pError) {
286+
const ErrorEbm error =
287+
ApplyBitpacking<TObjective, bCollapsed, bValidation, bWeight, bHessian, bUseApprox, cCompilerScores>(
288+
pObjective, pData);
289+
if(Error_None != error) {
290+
#ifdef GPU_COMPILE
291+
// ErrorEbm is int32_t; CUDA's atomicCAS takes int*. Cast is safe on all supported platforms
292+
// where sizeof(int) == 4. First-error-wins: if the slot is still Error_None (0), swap in our
293+
// error; otherwise leave the earlier winner in place.
294+
atomicCAS(reinterpret_cast<int*>(pError), static_cast<int>(Error_None), static_cast<int>(error));
295+
#else
296+
*pError = error;
297+
#endif
298+
}
284299
}
285300

286301
struct Registrable {
@@ -554,7 +569,7 @@ struct Objective : public Registrable {
554569
bool bUseApprox,
555570
size_t cCompilerScores,
556571
int cCompilerPack>
557-
GPU_DEVICE NEVER_INLINE void ChildApplyUpdate(ApplyUpdateBridge* const pData) const {
572+
GPU_DEVICE NEVER_INLINE ErrorEbm ChildApplyUpdate(ApplyUpdateBridge* const pData) const {
558573
using TFloat = typename TObjective::TFloatInternal;
559574
const TObjective* const pObjective = static_cast<const TObjective*>(this);
560575

@@ -729,6 +744,7 @@ struct Objective : public Registrable {
729744
if(bValidation) {
730745
pData->m_metricOut += static_cast<double>(Sum(metricSum));
731746
}
747+
return Error_None;
732748
}
733749

734750
template<typename TObjective>
@@ -1111,8 +1127,8 @@ struct RegressionMultitaskObjective : public MultitaskObjective {
11111127
bool bUseApprox, \
11121128
size_t cCompilerScores, \
11131129
int cCompilerPack> \
1114-
GPU_DEVICE void InjectedApplyUpdate(ApplyUpdateBridge* const pData) const { \
1115-
Objective::ChildApplyUpdate<typename std::remove_pointer<decltype(this)>::type, \
1130+
GPU_DEVICE ErrorEbm InjectedApplyUpdate(ApplyUpdateBridge* const pData) const { \
1131+
return Objective::ChildApplyUpdate<typename std::remove_pointer<decltype(this)>::type, \
11161132
bCollapsed, \
11171133
bValidation, \
11181134
bWeight, \

shared/libebm/compute/avx2_ebm/avx2_32.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -736,9 +736,10 @@ struct alignas(k_cAlignment) Avx2_32_Float final {
736736
size_t cCompilerScores>
737737
INLINE_RELEASE_TEMPLATED static ErrorEbm OperatorApplyUpdate(
738738
const Objective* const pObjective, ApplyUpdateBridge* const pData) noexcept {
739+
ErrorEbm error = Error_None;
739740
RemoteApplyUpdate<TObjective, bCollapsed, bValidation, bWeight, bHessian, bUseApprox, cCompilerScores>(
740-
pObjective, pData);
741-
return Error_None;
741+
pObjective, pData, &error);
742+
return error;
742743
}
743744

744745
template<bool bHessian, bool bWeight, bool bCollapsed, size_t cCompilerScores, bool bParallel>

shared/libebm/compute/avx512f_ebm/avx512f_32.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -788,9 +788,10 @@ struct alignas(k_cAlignment) Avx512f_32_Float final {
788788
size_t cCompilerScores>
789789
INLINE_RELEASE_TEMPLATED static ErrorEbm OperatorApplyUpdate(
790790
const Objective* const pObjective, ApplyUpdateBridge* const pData) noexcept {
791+
ErrorEbm error = Error_None;
791792
RemoteApplyUpdate<TObjective, bCollapsed, bValidation, bWeight, bHessian, bUseApprox, cCompilerScores>(
792-
pObjective, pData);
793-
return Error_None;
793+
pObjective, pData, &error);
794+
return error;
794795
}
795796

796797
template<bool bHessian, bool bWeight, bool bCollapsed, size_t cCompilerScores, bool bParallel>

shared/libebm/compute/cpu_ebm/cpu_64.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -385,9 +385,10 @@ struct Cpu_64_Float final {
385385
size_t cCompilerScores>
386386
INLINE_RELEASE_TEMPLATED static ErrorEbm OperatorApplyUpdate(
387387
const Objective* const pObjective, ApplyUpdateBridge* const pData) noexcept {
388+
ErrorEbm error = Error_None;
388389
RemoteApplyUpdate<TObjective, bCollapsed, bValidation, bWeight, bHessian, bUseApprox, cCompilerScores>(
389-
pObjective, pData);
390-
return Error_None;
390+
pObjective, pData, &error);
391+
return error;
391392
}
392393

393394
template<bool bHessian, bool bWeight, bool bCollapsed, size_t cCompilerScores, bool bParallel>

shared/libebm/compute/cuda_ebm/cuda_32.cu

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -363,6 +363,8 @@ struct Cuda_32_Float final {
363363
int * aDeviceVal2 = nullptr;
364364
int * aDeviceResult = nullptr;
365365
void * pDeviceObjective = nullptr;
366+
ErrorEbm * pDeviceError = nullptr;
367+
ErrorEbm kernelError = Error_None;
366368
cudaError_t error;
367369

368370
error = cudaSetDevice(0);
@@ -385,6 +387,16 @@ struct Cuda_32_Float final {
385387
goto exit_error;
386388
}
387389

390+
error = cudaMalloc((void **)&pDeviceError, sizeof(ErrorEbm));
391+
if(cudaSuccess != error) {
392+
goto exit_error;
393+
}
394+
395+
error = cudaMemcpy(pDeviceError, &kernelError, sizeof(ErrorEbm), cudaMemcpyHostToDevice);
396+
if(cudaSuccess != error) {
397+
goto exit_error;
398+
}
399+
388400
if(!std::is_empty<TObjective>::value) {
389401
error = cudaMalloc((void **)&pDeviceObjective, sizeof(TObjective));
390402
if(cudaSuccess != error) {
@@ -407,7 +419,7 @@ struct Cuda_32_Float final {
407419
}
408420

409421
TestGpuAdd<TObjective><<<1, k_cItems>>>(static_cast<Objective *>(pDeviceObjective), aDeviceVal1, aDeviceVal2, aDeviceResult);
410-
RemoteApplyUpdate<TObjective, cCompilerScores, bValidation, bWeight, bHessian, cCompilerPack><<<1, k_cItems>>>(pObjective, pData);
422+
RemoteApplyUpdate<TObjective, cCompilerScores, bValidation, bWeight, bHessian, cCompilerPack><<<1, k_cItems>>>(pObjective, pData, pDeviceError);
411423

412424
error = cudaGetLastError();
413425
if(cudaSuccess != error) {
@@ -424,6 +436,11 @@ struct Cuda_32_Float final {
424436
goto exit_error;
425437
}
426438

439+
error = cudaMemcpy(&kernelError, pDeviceError, sizeof(ErrorEbm), cudaMemcpyDeviceToHost);
440+
if(cudaSuccess != error) {
441+
goto exit_error;
442+
}
443+
427444
bExitError = false;
428445

429446
exit_error:
@@ -458,14 +475,21 @@ struct Cuda_32_Float final {
458475
}
459476
}
460477

478+
if(nullptr != pDeviceError) {
479+
error = cudaFree(pDeviceError);
480+
if(cudaSuccess != error) {
481+
bExitHard = true;
482+
}
483+
}
484+
461485
if(bExitHard) {
462486
bExitError = true;
463487

464488
// not much to do with the error if we fail cudaDeviceReset after failing cudaFree
465489
error = cudaDeviceReset();
466490
}
467491

468-
return bExitError ? Error_UnexpectedInternal : Error_None;
492+
return bExitError ? Error_UnexpectedInternal : kernelError;
469493
}
470494

471495

shared/libebm/compute/objectives/LogLossBinaryObjective.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ template<typename TFloat> struct LogLossBinaryObjective : BinaryObjective {
7171
bool bUseApprox,
7272
size_t cCompilerScores,
7373
int cCompilerPack>
74-
GPU_DEVICE NEVER_INLINE void InjectedApplyUpdate(ApplyUpdateBridge* const pData) const {
74+
GPU_DEVICE NEVER_INLINE ErrorEbm InjectedApplyUpdate(ApplyUpdateBridge* const pData) const {
7575
static_assert(k_oneScore == cCompilerScores, "We special case the classifiers so do not need to handle them");
7676
static_assert(!bValidation || !bHessian, "bHessian can only be true if bValidation is false");
7777
static_assert(bValidation || !bWeight, "bWeight can only be true if bValidation is true");
@@ -372,5 +372,6 @@ template<typename TFloat> struct LogLossBinaryObjective : BinaryObjective {
372372
if(bValidation) {
373373
pData->m_metricOut += static_cast<double>(Sum(metricSum));
374374
}
375+
return Error_None;
375376
}
376377
};

shared/libebm/compute/objectives/LogLossMulticlassObjective.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ template<typename TFloat> struct LogLossMulticlassObjective : MulticlassObjectiv
9393
bool bUseApprox,
9494
size_t cCompilerScores,
9595
int cCompilerPack>
96-
GPU_DEVICE NEVER_INLINE void InjectedApplyUpdate(ApplyUpdateBridge* const pData) const {
96+
GPU_DEVICE NEVER_INLINE ErrorEbm InjectedApplyUpdate(ApplyUpdateBridge* const pData) const {
9797
static_assert(k_dynamicScores == cCompilerScores || 2 <= cCompilerScores, "Multiclass needs more than 1 score");
9898
static_assert(!bValidation || !bHessian, "bHessian can only be true if bValidation is false");
9999
static_assert(bValidation || !bWeight, "bWeight can only be true if bValidation is true");
@@ -369,5 +369,6 @@ template<typename TFloat> struct LogLossMulticlassObjective : MulticlassObjectiv
369369
if(bValidation) {
370370
pData->m_metricOut += static_cast<double>(Sum(metricSum));
371371
}
372+
return Error_None;
372373
}
373374
};

shared/libebm/compute/objectives/RmseRegressionObjective.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ template<typename TFloat> struct RmseRegressionObjective : RegressionObjective {
110110
bool bUseApprox,
111111
size_t cCompilerScores,
112112
int cCompilerPack>
113-
GPU_DEVICE NEVER_INLINE void InjectedApplyUpdate(ApplyUpdateBridge* const pData) const {
113+
GPU_DEVICE NEVER_INLINE ErrorEbm InjectedApplyUpdate(ApplyUpdateBridge* const pData) const {
114114
static_assert(k_oneScore == cCompilerScores, "for RMSE regression there should always be one score");
115115
static_assert(!bHessian, "for RMSE regression we should never need the hessians");
116116
static_assert(bValidation || !bWeight, "bWeight can only be true if bValidation is true");
@@ -271,5 +271,6 @@ template<typename TFloat> struct RmseRegressionObjective : RegressionObjective {
271271
if(bValidation) {
272272
pData->m_metricOut += static_cast<double>(Sum(metricSum));
273273
}
274+
return Error_None;
274275
}
275276
};

0 commit comments

Comments
 (0)