Skip to content

Commit 9e1df01

Browse files
authored
*shared* druntime: Avoid aborting with '_handleToDSO not in sync with _loadedDSOs' (#5048)
This error currently happens when a D DSO is unloaded in a thread the DSO wasn't registered with. So for example, loading a D Python extension DLL via dlopen/LoadLibrary in a Python worker thread (dragging in the druntime and Phobos DLLs etc., and initializing those in the worker thread), and one of the D DLLs then being unloaded in the Python *main* thread at Python exit. druntime has no way of registering the unknown main thread with druntime (registering the OS-initialized TLS data with the D GC, running the TLS module ctors etc.). [Normally, if you spawn a new thread via druntime, then that new thread inherits all the DSOs registered with the parent thread.] What we currently do in case a DSO is unloaded in such a thread is still running the TLS module dtors (as well as the shared module dtors etc.) of that DSO, but then printing that 'not in sync' message and aborting the process, so not finalizing any remaining DSOs. Try to handle such scenarios more gracefully by * emitting a stderr warning (per unloaded DSO) instead of aborting (and so not skipping any remaining DSOs anymore, as well as not tampering with the exit code), * emitting the warning *before* the DSO finalization, so that any issues there are easier to troubleshoot, and * excluding the TLS module dtors for such not-registered-in-current-thread DSOs, as the TLS module ctors most likely haven't run either in that thread.
1 parent 7222867 commit 9e1df01

1 file changed

Lines changed: 45 additions & 24 deletions

File tree

runtime/druntime/src/rt/sections_elf_shared.d

Lines changed: 45 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -645,54 +645,73 @@ package extern(C) void _d_dso_registry(void* arg)
645645
if (doFinalize)
646646
doFinalize = !gc_isProxied();
647647
}
648-
if (doFinalize)
649-
{
650-
// rt_unloadLibrary already ran tls dtors, so do this only for dlclose
651-
immutable runTlsDtors = !_rtLoading;
652-
runModuleDestructors(pdso, runTlsDtors);
653-
unregisterGCRanges(pdso);
654-
// run finalizers after module dtors (same order as in rt_term)
655-
version (Shared) runFinalizers(pdso);
656-
}
657648

649+
bool runTlsDtors = true;
658650
version (Shared)
659651
{
660-
if (!_rtLoading)
652+
size_t loadedDSOIndex = size_t.max;
653+
654+
if (_rtLoading)
655+
{
656+
// This DSO is being unloaded via rt_unloadLibrary; the TLS
657+
// module dtors already ran, and it was removed from _loadedDSOs.
658+
runTlsDtors = false;
659+
}
660+
else // dlclose
661661
{
662-
/* This DSO was not unloaded by rt_unloadLibrary so we
663-
* have to remove it from _loadedDSOs here.
664-
*/
665662
foreach (i, ref tdso; _loadedDSOs)
666663
{
667664
if (tdso._pdso == pdso)
668665
{
669-
_loadedDSOs.remove(i);
666+
loadedDSOIndex = i;
670667
break;
671668
}
672669
}
670+
671+
if (loadedDSOIndex == size_t.max)
672+
{
673+
import core.stdc.stdio : fprintf, stderr;
674+
fprintf(stderr, "druntime warning: DSO being unloaded isn't in thread-local DSO list. Terminating/unloading in a thread not attached to druntime?\n");
675+
runTlsDtors = false;
676+
}
673677
}
678+
}
674679

675-
unsetDSOForHandle(pdso, pdso._handle);
680+
if (doFinalize)
681+
{
682+
runModuleDestructors(pdso, runTlsDtors);
683+
unregisterGCRanges(pdso);
684+
// run finalizers after module dtors (same order as in rt_term)
685+
version (Shared) runFinalizers(pdso);
686+
}
687+
688+
version (Shared)
689+
{
690+
if (loadedDSOIndex != size_t.max)
691+
{
692+
safeAssert(_loadedDSOs[loadedDSOIndex]._pdso == pdso, "Stale loadedDSOIndex.");
693+
_loadedDSOs.remove(loadedDSOIndex);
694+
}
695+
696+
const numGlobalDSOs = unsetDSOForHandle(pdso, pdso._handle);
697+
safeAssert(_loadedDSOs.length <= numGlobalDSOs, "Thread-local DSO list with more entries than thread-global one.");
698+
const isLastDSO = numGlobalDSOs == 0;
699+
if (isLastDSO)
700+
_handleToDSO.reset();
676701
}
677702
else
678703
{
679704
// static DSOs are unloaded in reverse order
680705
safeAssert(pdso == _loadedDSOs.back, "DSO being unregistered isn't current last one.");
681706
_loadedDSOs.popBack();
707+
const isLastDSO = _loadedDSOs.empty;
682708
}
683709

684710
freeDSO(pdso);
685711

686712
// last DSO being unloaded => shutdown registry
687-
if (_loadedDSOs.empty)
688-
{
689-
version (Shared)
690-
{
691-
safeAssert(_handleToDSO.empty, "_handleToDSO not in sync with _loadedDSOs.");
692-
_handleToDSO.reset();
693-
}
713+
if (isLastDSO)
694714
finiLocks();
695-
}
696715
}
697716
}
698717

@@ -898,12 +917,14 @@ version (Shared)
898917
_handleToDSOMutex.unlock_nothrow();
899918
}
900919

901-
void unsetDSOForHandle(DSO* pdso, void* handle)
920+
size_t unsetDSOForHandle(DSO* pdso, void* handle)
902921
{
903922
_handleToDSOMutex.lock_nothrow();
904923
safeAssert(_handleToDSO[handle] == pdso, "Handle doesn't match registered DSO.");
905924
_handleToDSO.remove(handle);
925+
const numDSOs = _handleToDSO.length;
906926
_handleToDSOMutex.unlock_nothrow();
927+
return numDSOs;
907928
}
908929

909930
static if (SharedELF) void getDependencies(const scope ref SharedObject object, ref Array!(DSO*) deps)

0 commit comments

Comments
 (0)