@@ -80,10 +80,10 @@ template<typename TObjective,
8080 bool bUseApprox,
8181 size_t cCompilerScores,
8282 int cCompilerPack>
83- GPU_DEVICE INLINE_RELEASE_TEMPLATED static void DoneBitpacking (
83+ GPU_DEVICE INLINE_RELEASE_TEMPLATED static ErrorEbm DoneBitpacking (
8484 const Objective* const pObjective, ApplyUpdateBridge* const pData) {
8585 const TObjective* const pObjectiveSpecific = static_cast <const TObjective*>(pObjective);
86- pObjectiveSpecific->template InjectedApplyUpdate <bCollapsed,
86+ return pObjectiveSpecific->template InjectedApplyUpdate <bCollapsed,
8787 bValidation,
8888 bWeight,
8989 bHessian,
@@ -103,7 +103,7 @@ template<typename TObjective,
103103 size_t cCompilerScores,
104104 int cCompilerPack>
105105struct BitPackObjective final {
106- GPU_DEVICE INLINE_RELEASE_TEMPLATED static void Func (
106+ GPU_DEVICE INLINE_RELEASE_TEMPLATED static ErrorEbm Func (
107107 const Objective* const pObjective, ApplyUpdateBridge* const pData) {
108108
109109 static_assert (!bCollapsed, " Cannot be bCollapsed since there would be no bitpacking" );
@@ -117,18 +117,21 @@ struct BitPackObjective final {
117117 if (0 != cRemnants) {
118118 pData->m_cSamples = cRemnants;
119119
120- DoneBitpacking<TObjective,
120+ const ErrorEbm error = DoneBitpacking<TObjective,
121121 bCollapsed,
122122 bValidation,
123123 bWeight,
124124 bHessian,
125125 bUseApprox,
126126 cCompilerScores,
127127 k_cItemsPerBitPackUndefined>(pObjective, pData);
128+ if (Error_None != error) {
129+ return error;
130+ }
128131
129132 cSamples -= cRemnants;
130133 if (0 == cSamples) {
131- return ;
134+ return Error_None ;
132135 }
133136 pData->m_cSamples = cSamples;
134137
@@ -174,7 +177,7 @@ struct BitPackObjective final {
174177 EBM_ASSERT (nullptr == pData->m_aSampleScores );
175178 }
176179 }
177- DoneBitpacking<TObjective,
180+ return DoneBitpacking<TObjective,
178181 bCollapsed,
179182 bValidation,
180183 bWeight,
@@ -183,7 +186,7 @@ struct BitPackObjective final {
183186 cCompilerScores,
184187 cCompilerPack>(pObjective, pData);
185188 } else {
186- BitPackObjective<TObjective,
189+ return BitPackObjective<TObjective,
187190 bCollapsed,
188191 bValidation,
189192 bWeight,
@@ -211,12 +214,12 @@ struct BitPackObjective<TObjective,
211214 cCompilerScores,
212215 k_cItemsPerBitPackUndefined>
213216 final {
214- GPU_DEVICE INLINE_RELEASE_TEMPLATED static void Func (
217+ GPU_DEVICE INLINE_RELEASE_TEMPLATED static ErrorEbm Func (
215218 const Objective* const pObjective, ApplyUpdateBridge* const pData) {
216219
217220 static_assert (!bCollapsed, " Cannot be bCollapsed since there would be no bitpacking" );
218221
219- DoneBitpacking<TObjective,
222+ return DoneBitpacking<TObjective,
220223 bCollapsed,
221224 bValidation,
222225 bWeight,
@@ -237,9 +240,9 @@ template<typename TObjective,
237240 typename std::enable_if<!(bCollapsed || 1 != cCompilerScores || bUseApprox ||
238241 AccelerationFlags_NONE == TObjective::TFloatInternal::k_zone),
239242 int >::type = 0 >
240- GPU_DEVICE INLINE_RELEASE_TEMPLATED static void ApplyBitpacking (
243+ GPU_DEVICE INLINE_RELEASE_TEMPLATED static ErrorEbm ApplyBitpacking (
241244 const Objective* const pObjective, ApplyUpdateBridge* const pData) {
242- BitPackObjective<TObjective,
245+ return BitPackObjective<TObjective,
243246 bCollapsed,
244247 bValidation,
245248 bWeight,
@@ -259,9 +262,9 @@ template<typename TObjective,
259262 typename std::enable_if<bCollapsed || 1 != cCompilerScores || bUseApprox ||
260263 AccelerationFlags_NONE == TObjective::TFloatInternal::k_zone,
261264 int >::type = 0 >
262- GPU_DEVICE INLINE_RELEASE_TEMPLATED static void ApplyBitpacking (
265+ GPU_DEVICE INLINE_RELEASE_TEMPLATED static ErrorEbm ApplyBitpacking (
263266 const Objective* const pObjective, ApplyUpdateBridge* const pData) {
264- DoneBitpacking<TObjective,
267+ return DoneBitpacking<TObjective,
265268 bCollapsed,
266269 bValidation,
267270 bWeight,
@@ -278,9 +281,21 @@ template<typename TObjective,
278281 bool bHessian,
279282 bool bUseApprox,
280283 size_t cCompilerScores>
281- GPU_GLOBAL static void RemoteApplyUpdate (const Objective* const pObjective, ApplyUpdateBridge* const pData) {
282- ApplyBitpacking<TObjective, bCollapsed, bValidation, bWeight, bHessian, bUseApprox, cCompilerScores>(
283- pObjective, pData);
284+ GPU_GLOBAL static void RemoteApplyUpdate (
285+ const Objective* const pObjective, ApplyUpdateBridge* const pData, ErrorEbm* const pError) {
286+ const ErrorEbm error =
287+ ApplyBitpacking<TObjective, bCollapsed, bValidation, bWeight, bHessian, bUseApprox, cCompilerScores>(
288+ pObjective, pData);
289+ if (Error_None != error) {
290+ #ifdef GPU_COMPILE
291+ // ErrorEbm is int32_t; CUDA's atomicCAS takes int*. Cast is safe on all supported platforms
292+ // where sizeof(int) == 4. First-error-wins: if the slot is still Error_None (0), swap in our
293+ // error; otherwise leave the earlier winner in place.
294+ atomicCAS (reinterpret_cast <int *>(pError), static_cast <int >(Error_None), static_cast <int >(error));
295+ #else
296+ *pError = error;
297+ #endif
298+ }
284299}
285300
286301struct Registrable {
@@ -554,7 +569,7 @@ struct Objective : public Registrable {
554569 bool bUseApprox,
555570 size_t cCompilerScores,
556571 int cCompilerPack>
557- GPU_DEVICE NEVER_INLINE void ChildApplyUpdate (ApplyUpdateBridge* const pData) const {
572+ GPU_DEVICE NEVER_INLINE ErrorEbm ChildApplyUpdate (ApplyUpdateBridge* const pData) const {
558573 using TFloat = typename TObjective::TFloatInternal;
559574 const TObjective* const pObjective = static_cast <const TObjective*>(this );
560575
@@ -729,6 +744,7 @@ struct Objective : public Registrable {
729744 if (bValidation) {
730745 pData->m_metricOut += static_cast <double >(Sum (metricSum));
731746 }
747+ return Error_None;
732748 }
733749
734750 template <typename TObjective>
@@ -1111,8 +1127,8 @@ struct RegressionMultitaskObjective : public MultitaskObjective {
11111127 bool bUseApprox, \
11121128 size_t cCompilerScores, \
11131129 int cCompilerPack> \
1114- GPU_DEVICE void InjectedApplyUpdate (ApplyUpdateBridge* const pData) const { \
1115- Objective::ChildApplyUpdate<typename std::remove_pointer<decltype (this )>::type, \
1130+ GPU_DEVICE ErrorEbm InjectedApplyUpdate (ApplyUpdateBridge* const pData) const { \
1131+ return Objective::ChildApplyUpdate<typename std::remove_pointer<decltype (this )>::type, \
11161132 bCollapsed, \
11171133 bValidation, \
11181134 bWeight, \
0 commit comments