GH-83065: Fix import deadlock by implementing hierarchical module locking (GH-137196)

Make _find_and_load() acquire the module locks for the full
dotted-name chain (parent before child) when loading a nested module, so
both threads contend on the same first lock and serialise instead of
deadlocking.

When acquiring a parent's lock would itself deadlock with another thread
that is loading that parent (cross-package circular imports), the parent's
lock is skipped and the partially-initialised parent is accepted -- the
same policy _lock_unlock_module() already applies on the existing code
path -- so concurrent circular imports that worked before continue to work.
This commit is contained in:
Gregory P. Smith
2026-04-28 01:06:23 -07:00
committed by GitHub
parent 29251396a9
commit 9a57179d74
4 changed files with 216 additions and 1 deletions
+7
View File
@@ -677,6 +677,13 @@ Other language changes
the existing support for unary minus.
(Contributed by Bartosz Sławecki in :gh:`145239`.)
* The import system now acquires per-module locks in hierarchical order
(parent packages before their submodules). This fixes a long-standing
deadlock where one thread importing ``pkg.sub`` and another importing
``pkg.sub.mod`` could each block the other when ``pkg/sub/__init__.py``
imports ``pkg.sub.mod``.
(Contributed by Gregory P. Smith in :gh:`83065`.)
New modules
===========
+65 -1
View File
@@ -424,6 +424,64 @@ class _ModuleLockManager:
self._lock.release()
def _get_module_chain(name):
"""Return the chain of dotted-name prefixes from root to leaf.
For example: 'a.b.c' -> ['a', 'a.b', 'a.b.c']
"""
parts = name.split('.')
return ['.'.join(parts[:i+1]) for i in range(len(parts))]
class _HierarchicalLockManager:
"""Manages acquisition of multiple module locks in hierarchical order.
This prevents deadlocks by ensuring all threads acquire locks in the
same order (parent modules before child modules).
"""
def __init__(self, name):
self._name = name
self._module_chain = _get_module_chain(name)
self._locks = []
def __enter__(self):
try:
for module_name in self._module_chain:
# Only acquire lock if module is not already fully loaded
module = sys.modules.get(module_name)
if (module is None or
getattr(getattr(module, "__spec__", None),
"_initializing", False)):
lock = _get_module_lock(module_name)
try:
lock.acquire()
except _DeadlockError:
if module_name == self._name:
raise
# The parent is being initialised by a thread that
# is (transitively) waiting on a lock we hold.
# Apply the same policy as _lock_unlock_module():
# accept a partially-initialised parent for circular
# imports rather than failing the whole chain.
continue
self._locks.append((module_name, lock))
except:
# __exit__ is not called when __enter__ raises (e.g. _DeadlockError
# on the leaf lock, or KeyboardInterrupt), so release whatever we
# already hold to avoid permanently leaking held module locks.
for module_name, lock in reversed(self._locks):
lock.release()
self._locks.clear()
raise
return self
def __exit__(self, *args, **kwargs):
for module_name, lock in reversed(self._locks):
lock.release()
self._locks.clear()
# The following two functions are for consumption by Python/import.c.
def _get_module_lock(name):
@@ -1276,7 +1334,13 @@ def _find_and_load(name, import_):
module = sys.modules.get(name, _NEEDS_LOADING)
if (module is _NEEDS_LOADING or
getattr(getattr(module, "__spec__", None), "_initializing", False)):
with _ModuleLockManager(name):
if '.' in name:
lock_manager = _HierarchicalLockManager(name)
else:
lock_manager = _ModuleLockManager(name)
with lock_manager:
module = sys.modules.get(name, _NEEDS_LOADING)
if module is _NEEDS_LOADING:
return _find_and_load_unlocked(name, import_)
@@ -324,6 +324,143 @@ raise RuntimeError("Intentional import failure")
# Neither thread should have errors about stale modules
self.assertEqual(errors, [], f"Race condition detected: {errors}")
def test_hierarchical_import_deadlock(self):
# Regression test for bpo-38884 / gh-83065
# Tests that concurrent imports at different hierarchy levels
# don't deadlock when parent imports child in __init__.py
# Create package structure:
# package/__init__.py: from package import subpackage
# package/subpackage/__init__.py: from package.subpackage.module import *
# package/subpackage/module.py: class SomeClass: pass
pkg_dir = os.path.join(TESTFN, 'hier_deadlock_pkg')
os.makedirs(pkg_dir)
self.addCleanup(shutil.rmtree, TESTFN)
subpkg_dir = os.path.join(pkg_dir, 'subpackage')
os.makedirs(subpkg_dir)
with open(os.path.join(pkg_dir, "__init__.py"), "w") as f:
f.write("from hier_deadlock_pkg import subpackage\n")
with open(os.path.join(subpkg_dir, "__init__.py"), "w") as f:
f.write("from hier_deadlock_pkg.subpackage.module import *\n")
with open(os.path.join(subpkg_dir, "module.py"), "w") as f:
f.write("class SomeClass:\n pass\n")
sys.path.insert(0, TESTFN)
self.addCleanup(sys.path.remove, TESTFN)
self.addCleanup(forget, 'hier_deadlock_pkg')
self.addCleanup(forget, 'hier_deadlock_pkg.subpackage')
self.addCleanup(forget, 'hier_deadlock_pkg.subpackage.module')
importlib.invalidate_caches()
errors = []
results = []
barrier = threading.Barrier(2)
def t1():
barrier.wait()
try:
import hier_deadlock_pkg.subpackage
results.append('t1_success')
except Exception as e:
errors.append(('t1', type(e).__name__, str(e)))
def t2():
barrier.wait()
try:
import hier_deadlock_pkg.subpackage.module
results.append('t2_success')
except Exception as e:
errors.append(('t2', type(e).__name__, str(e)))
# Run multiple times to increase chance of hitting race condition
for i in range(10):
for mod in ['hier_deadlock_pkg', 'hier_deadlock_pkg.subpackage',
'hier_deadlock_pkg.subpackage.module']:
sys.modules.pop(mod, None)
errors.clear()
results.clear()
barrier.reset()
thread1 = threading.Thread(target=t1)
thread2 = threading.Thread(target=t2)
thread1.start()
thread2.start()
thread1.join(timeout=5)
thread2.join(timeout=5)
if thread1.is_alive() or thread2.is_alive():
self.fail(f"Threads deadlocked on iteration {i}")
self.assertEqual(
errors, [],
f"Import(s) failed on iteration {i}: {errors}")
self.assertEqual(
sorted(results), ['t1_success', 't2_success'],
f"Not all imports succeeded on iteration {i}: {results}")
def test_cross_package_circular_import(self):
# Two packages whose __init__.py each import a submodule of the
# other. Concurrent imports of submodules of each must not raise
# _DeadlockError; the import system accepts a partially-initialised
# parent in this case (see _lock_unlock_module).
os.makedirs(os.path.join(TESTFN, "circ_a"))
os.makedirs(os.path.join(TESTFN, "circ_b"))
self.addCleanup(shutil.rmtree, TESTFN)
with open(os.path.join(TESTFN, "circ_a", "__init__.py"), "w") as f:
f.write("import time; time.sleep(0.03)\nimport circ_b.other\n")
with open(os.path.join(TESTFN, "circ_b", "__init__.py"), "w") as f:
f.write("import time; time.sleep(0.03)\nimport circ_a.other\n")
for pkg in ("circ_a", "circ_b"):
for mod in ("sub.py", "other.py"):
with open(os.path.join(TESTFN, pkg, mod), "w") as f:
f.write("X = 1\n")
sys.path.insert(0, TESTFN)
self.addCleanup(sys.path.remove, TESTFN)
for mod in ("circ_a", "circ_a.sub", "circ_a.other",
"circ_b", "circ_b.sub", "circ_b.other"):
self.addCleanup(forget, mod)
importlib.invalidate_caches()
errors = []
barrier = threading.Barrier(2)
def do_import(name):
barrier.wait()
try:
importlib.import_module(name)
except Exception as e:
errors.append((name, type(e).__name__, str(e)))
for i in range(10):
for mod in ("circ_a", "circ_a.sub", "circ_a.other",
"circ_b", "circ_b.sub", "circ_b.other"):
sys.modules.pop(mod, None)
errors.clear()
barrier.reset()
thread1 = threading.Thread(target=do_import, args=("circ_a.sub",))
thread2 = threading.Thread(target=do_import, args=("circ_b.sub",))
thread1.start()
thread2.start()
thread1.join(timeout=5)
thread2.join(timeout=5)
if thread1.is_alive() or thread2.is_alive():
self.fail(f"Threads deadlocked on iteration {i}")
self.assertEqual(
errors, [],
f"Import(s) failed on iteration {i}: {errors}")
def setUpModule():
thread_info = threading_helper.threading_setup()
@@ -0,0 +1,7 @@
Fix a deadlock that could occur when one thread is importing a submodule
(for example ``import pkg.sub.mod``) while another thread is importing one
of its parent packages (for example ``import pkg.sub``) and that parent's
``__init__.py`` itself imports the submodule. The import system now
acquires module locks in hierarchical (parent-before-child) order so the
two threads serialise instead of raising
``_DeadlockError``.