@@ -27,6 +27,141 @@ namespace ze_lib
2727 }
2828 }
2929 bool delayContextDestruction = false ;
30+ #define ZEL_STABILITY_CHECK_RESULT_SUCCESS 0
31+ #define ZEL_STABILITY_CHECK_RESULT_DRIVER_GET_NULL 1
32+ #define ZEL_STABILITY_CHECK_RESULT_DRIVER_GET_FAILED 2
33+ #define ZEL_STABILITY_CHECK_RESULT_EXCEPTION 3
34+ #define ZEL_STABILITY_THREAD_EXIT 4
35+ #define ZEL_STABILITY_THREAD_TIMEOUT 5
36+ #define ZEL_STABILITY_THREAD_SIGNAL 1
37+ std::promise<int > *sharedSignal = nullptr ;
38+ std::shared_future<int > *sharedFuture = nullptr ;
39+ std::promise<int > *sharedResult = nullptr ;
40+ std::shared_future<int > *sharedResultFuture = nullptr ;
41+
42+ class StabilityThreadClass {
43+ public:
44+ StabilityThreadClass (std::function<void ()> func) :
45+ stabilityThread ([this , func]() {
46+ try {
47+ func ();
48+ crashed_.store (false );
49+ } catch (...) {
50+ crashed_.store (true );
51+ }
52+ }) {}
53+
54+ ~StabilityThreadClass () {
55+ try {
56+ if (stabilityThread.joinable () && !has_crashed ()) {
57+ signal (ZEL_STABILITY_THREAD_EXIT);
58+ stabilityThread.join ();
59+ }
60+ } catch (...) {
61+ crashed_.store (true );
62+ }
63+ }
64+
65+ bool has_crashed () const {
66+ return crashed_.load ();
67+ }
68+
69+ void signal (int signalValue) {
70+ try {
71+ if (stabilityThread.joinable () && !has_crashed ()) {
72+ ze_lib::sharedSignal->set_value (signalValue);
73+ }
74+ } catch (...) {
75+ crashed_.store (true );
76+ }
77+ }
78+
79+ void join () {
80+ try {
81+ if (stabilityThread.joinable ()) {
82+ stabilityThread.join ();
83+ }
84+ } catch (...) {
85+ crashed_.store (true );
86+ }
87+ }
88+
89+ private:
90+ std::thread stabilityThread;
91+ std::atomic<bool > crashed_{false };
92+ };
93+ StabilityThreadClass *l0StabilityThread = nullptr ;
94+
95+ /* *
96+ * @brief Performs a stability check for the Level Zero loader.
97+ *
98+ * This function checks the stability of the Level Zero loader by verifying
99+ * the presence of the loader module, the validity of the `zeDriverGet` function
100+ * pointer, and the ability to retrieve driver information. The result of the
101+ * stability check is communicated through the provided promise.
102+ *
103+ * @param stabilityPromise A promise object used to communicate the result of
104+ * the stability check. The promise is set with one of
105+ * the following values:
106+ * - ZEL_STABILITY_CHECK_RESULT_DRIVER_GET_NULL: The
107+ * `zeDriverGet` function pointer is invalid.
108+ * - ZEL_STABILITY_CHECK_RESULT_DRIVER_GET_FAILED: The
109+ * loader failed to retrieve driver information.
110+ * - ZEL_STABILITY_CHECK_RESULT_EXCEPTION: An
111+ * exception occurred during the stability check.
112+ * - ZEL_STABILITY_CHECK_RESULT_SUCCESS: The stability
113+ * check was successful.
114+ *
115+ * @note If debug tracing is enabled, debug messages are logged for each failure
116+ * scenario.
117+ * @note If the Loader is completely torn down, this thread is expected to be killed
118+ * due to invalid memory access and the stability check will determine a failure.
119+ *
120+ * @exception This function catches all exceptions internally and does not throw.
121+ */
122+ void stabilityCheck () {
123+ // Wait for the user to set a value (signal) before proceeding with the stability check
124+ while (true ) {
125+ if (!ze_lib::context) {
126+ return ;
127+ }
128+ try {
129+ ze_lib::sharedFuture->wait ();
130+ auto signalValue = ze_lib::sharedFuture->get ();
131+ if (signalValue == ZEL_STABILITY_THREAD_EXIT) {
132+ ze_lib::sharedResult->set_value (ZEL_STABILITY_CHECK_RESULT_SUCCESS);
133+ return ;
134+ }
135+
136+ if (!ze_lib::context->loaderDriverGet ) {
137+ if (ze_lib::context->debugTraceEnabled ) {
138+ std::string message = " LoaderDriverGet is a bad pointer. Exiting stability checker thread." ;
139+ ze_lib::context->debug_trace_message (message, " " );
140+ }
141+ ze_lib::sharedResult->set_value (ZEL_STABILITY_CHECK_RESULT_DRIVER_GET_NULL);
142+ return ;
143+ }
144+
145+ uint32_t driverCount = 0 ;
146+ ze_result_t result = ZE_RESULT_ERROR_UNINITIALIZED;
147+ result = ze_lib::context->loaderDriverGet (&driverCount, nullptr );
148+ if (result != ZE_RESULT_SUCCESS || driverCount == 0 ) {
149+ if (ze_lib::context->debugTraceEnabled ) {
150+ std::string message = " Loader stability check failed. Exiting stability checker thread." ;
151+ ze_lib::context->debug_trace_message (message, " " );
152+ }
153+ ze_lib::sharedResult->set_value (ZEL_STABILITY_CHECK_RESULT_DRIVER_GET_FAILED);
154+ return ;
155+ }
156+ ze_lib::sharedResult->set_value (ZEL_STABILITY_CHECK_RESULT_SUCCESS);
157+ *ze_lib::sharedSignal = std::promise<int >();
158+ *ze_lib::sharedFuture = ze_lib::sharedSignal->get_future ().share ();
159+ } catch (...) {
160+ ze_lib::sharedResult->set_value (ZEL_STABILITY_CHECK_RESULT_EXCEPTION);
161+ return ;
162+ }
163+ }
164+ }
30165 #endif
31166 bool destruction = false ;
32167
@@ -43,6 +178,24 @@ namespace ze_lib
43178 if (loader) {
44179 FREE_DRIVER_LIBRARY ( loader );
45180 }
181+ delete l0StabilityThread;
182+ l0StabilityThread = nullptr ;
183+ if (sharedSignal) {
184+ delete sharedSignal;
185+ sharedSignal = nullptr ;
186+ }
187+ if (sharedFuture) {
188+ delete sharedFuture;
189+ sharedFuture = nullptr ;
190+ }
191+ if (sharedResultFuture) {
192+ delete sharedResultFuture;
193+ sharedResultFuture = nullptr ;
194+ }
195+ if (sharedResult) {
196+ delete sharedResult;
197+ sharedResult = nullptr ;
198+ }
46199#endif
47200 ze_lib::destruction = true ;
48201 };
@@ -149,6 +302,13 @@ namespace ze_lib
149302 std::string version_message = " Loader API Version to be requested is v" + std::to_string (ZE_MAJOR_VERSION (version)) + " ." + std::to_string (ZE_MINOR_VERSION (version));
150303 debug_trace_message (version_message, " " );
151304 loaderDriverGet = reinterpret_cast <ze_pfnDriverGet_t>(GET_FUNCTION_PTR (loader, " zeDriverGet" ));
305+ ze_lib::sharedSignal = new std::promise<int >();
306+ ze_lib::sharedFuture = new std::shared_future<int >();
307+ *ze_lib::sharedFuture = ze_lib::sharedSignal->get_future ().share ();
308+ ze_lib::sharedResult = new std::promise<int >();
309+ ze_lib::sharedResultFuture = new std::shared_future<int >();
310+ *ze_lib::sharedResultFuture = ze_lib::sharedResult->get_future ().share ();
311+ ze_lib::l0StabilityThread = new StabilityThreadClass (stabilityCheck);
152312#else
153313 result = zeLoaderInit ();
154314 if ( ZE_RESULT_SUCCESS == result ) {
@@ -405,70 +565,6 @@ zelSetDelayLoaderContextTeardown()
405565 #endif
406566}
407567
408- #ifdef DYNAMIC_LOAD_LOADER
409- #define ZEL_STABILITY_CHECK_RESULT_SUCCESS 0
410- #define ZEL_STABILITY_CHECK_RESULT_DRIVER_GET_NULL 1
411- #define ZEL_STABILITY_CHECK_RESULT_DRIVER_GET_FAILED 2
412- #define ZEL_STABILITY_CHECK_RESULT_EXCEPTION 3
413-
414- /* *
415- * @brief Performs a stability check for the Level Zero loader.
416- *
417- * This function checks the stability of the Level Zero loader by verifying
418- * the presence of the loader module, the validity of the `zeDriverGet` function
419- * pointer, and the ability to retrieve driver information. The result of the
420- * stability check is communicated through the provided promise.
421- *
422- * @param stabilityPromise A promise object used to communicate the result of
423- * the stability check. The promise is set with one of
424- * the following values:
425- * - ZEL_STABILITY_CHECK_RESULT_DRIVER_GET_NULL: The
426- * `zeDriverGet` function pointer is invalid.
427- * - ZEL_STABILITY_CHECK_RESULT_DRIVER_GET_FAILED: The
428- * loader failed to retrieve driver information.
429- * - ZEL_STABILITY_CHECK_RESULT_EXCEPTION: An
430- * exception occurred during the stability check.
431- * - ZEL_STABILITY_CHECK_RESULT_SUCCESS: The stability
432- * check was successful.
433- *
434- * @note If debug tracing is enabled, debug messages are logged for each failure
435- * scenario.
436- * @note If the Loader is completely torn down, this thread is expected to be killed
437- * due to invalid memory access and the stability check will determine a failure.
438- *
439- * @exception This function catches all exceptions internally and does not throw.
440- */
441- void stabilityCheck (std::promise<int > stabilityPromise) {
442- try {
443- if (!ze_lib::context->loaderDriverGet ) {
444- if (ze_lib::context->debugTraceEnabled ) {
445- std::string message = " LoaderDriverGet is a bad pointer. Exiting stability checker thread." ;
446- ze_lib::context->debug_trace_message (message, " " );
447- }
448- stabilityPromise.set_value (ZEL_STABILITY_CHECK_RESULT_DRIVER_GET_NULL);
449- return ;
450- }
451-
452- uint32_t driverCount = 0 ;
453- ze_result_t result = ZE_RESULT_ERROR_UNINITIALIZED;
454- result = ze_lib::context->loaderDriverGet (&driverCount, nullptr );
455- if (result != ZE_RESULT_SUCCESS || driverCount == 0 ) {
456- if (ze_lib::context->debugTraceEnabled ) {
457- std::string message = " Loader stability check failed. Exiting stability checker thread." ;
458- ze_lib::context->debug_trace_message (message, " " );
459- }
460- stabilityPromise.set_value (ZEL_STABILITY_CHECK_RESULT_DRIVER_GET_FAILED);
461- return ;
462- }
463- stabilityPromise.set_value (ZEL_STABILITY_CHECK_RESULT_SUCCESS);
464- return ;
465- } catch (...) {
466- stabilityPromise.set_value (ZEL_STABILITY_CHECK_RESULT_EXCEPTION);
467- return ;
468- }
469- }
470- #endif
471-
472568/* *
473569 * @brief Checks if the loader is in the process of tearing down.
474570 *
@@ -490,18 +586,36 @@ zelCheckIsLoaderInTearDown() {
490586 return true ;
491587 }
492588 #ifdef DYNAMIC_LOAD_LOADER
493- std::promise<int > stabilityPromise;
494- std::future<int > resultFuture = stabilityPromise.get_future ();
495589 int result = -1 ;
496- try {
497- // Launch the stability checker thread
498- std::thread stabilityThread (stabilityCheck, std::move (stabilityPromise));
499- result = resultFuture.get (); // Blocks until the result is available
590+ static bool failure = false ;
591+ if (failure) {
500592 if (ze_lib::context->debugTraceEnabled ) {
501- std::string message = " Stability checker thread completed with result: " + std::to_string (result) ;
593+ std::string message = " Stability checker thread failed already. " ;
502594 ze_lib::context->debug_trace_message (message, " " );
503595 }
504- stabilityThread.join ();
596+ return true ;
597+ }
598+ try {
599+ ze_lib::l0StabilityThread->signal (ZEL_STABILITY_THREAD_SIGNAL);
600+ if (ze_lib::sharedResultFuture->wait_for (std::chrono::milliseconds (ZEL_STABILITY_THREAD_TIMEOUT)) == std::future_status::timeout) {
601+ if (ze_lib::context->debugTraceEnabled ) {
602+ std::string message = " Stability checker thread timeout." ;
603+ ze_lib::context->debug_trace_message (message, " " );
604+ }
605+ result = ZEL_STABILITY_CHECK_RESULT_EXCEPTION;
606+ } else {
607+ if (!ze_lib::l0StabilityThread->has_crashed ()) {
608+ result = ze_lib::sharedResultFuture->get ();
609+ *ze_lib::sharedResult = std::promise<int >();
610+ *ze_lib::sharedResultFuture = ze_lib::sharedResult->get_future ().share ();
611+ } else {
612+ if (ze_lib::context->debugTraceEnabled ) {
613+ std::string message = " Stability checker thread crashed." ;
614+ ze_lib::context->debug_trace_message (message, " " );
615+ }
616+ result = ZEL_STABILITY_CHECK_RESULT_EXCEPTION;
617+ }
618+ }
505619 } catch (const std::exception& e) {
506620 if (ze_lib::context->debugTraceEnabled ) {
507621 std::string message = " Exception caught in parent thread: " + std::string (e.what ());
@@ -518,6 +632,7 @@ zelCheckIsLoaderInTearDown() {
518632 std::string message = " Loader stability check failed with result: " + std::to_string (result);
519633 ze_lib::context->debug_trace_message (message, " " );
520634 }
635+ failure = true ;
521636 return true ;
522637 }
523638 #endif
0 commit comments