mirror of
https://github.com/python/cpython.git
synced 2026-05-06 12:49:07 -04:00
GH-83065: Fix import deadlock by implementing hierarchical module locking (GH-137196)
Make _find_and_load() acquire the module locks for the full dotted-name chain (parent before child) when loading a nested module, so both threads contend on the same first lock and serialise instead of deadlocking. When acquiring a parent's lock would itself deadlock with another thread that is loading that parent (cross-package circular imports), the parent's lock is skipped and the partially-initialised parent is accepted -- the same policy _lock_unlock_module() already applies on the existing code path -- so concurrent circular imports that worked before continue to work.
This commit is contained in:
@@ -677,6 +677,13 @@ Other language changes
|
||||
the existing support for unary minus.
|
||||
(Contributed by Bartosz Sławecki in :gh:`145239`.)
|
||||
|
||||
* The import system now acquires per-module locks in hierarchical order
|
||||
(parent packages before their submodules). This fixes a long-standing
|
||||
deadlock where one thread importing ``pkg.sub`` and another importing
|
||||
``pkg.sub.mod`` could each block the other when ``pkg/sub/__init__.py``
|
||||
imports ``pkg.sub.mod``.
|
||||
(Contributed by Gregory P. Smith in :gh:`83065`.)
|
||||
|
||||
|
||||
New modules
|
||||
===========
|
||||
|
||||
@@ -424,6 +424,64 @@ class _ModuleLockManager:
|
||||
self._lock.release()
|
||||
|
||||
|
||||
def _get_module_chain(name):
|
||||
"""Return the chain of dotted-name prefixes from root to leaf.
|
||||
|
||||
For example: 'a.b.c' -> ['a', 'a.b', 'a.b.c']
|
||||
"""
|
||||
parts = name.split('.')
|
||||
return ['.'.join(parts[:i+1]) for i in range(len(parts))]
|
||||
|
||||
|
||||
class _HierarchicalLockManager:
|
||||
"""Manages acquisition of multiple module locks in hierarchical order.
|
||||
|
||||
This prevents deadlocks by ensuring all threads acquire locks in the
|
||||
same order (parent modules before child modules).
|
||||
"""
|
||||
|
||||
def __init__(self, name):
|
||||
self._name = name
|
||||
self._module_chain = _get_module_chain(name)
|
||||
self._locks = []
|
||||
|
||||
def __enter__(self):
|
||||
try:
|
||||
for module_name in self._module_chain:
|
||||
# Only acquire lock if module is not already fully loaded
|
||||
module = sys.modules.get(module_name)
|
||||
if (module is None or
|
||||
getattr(getattr(module, "__spec__", None),
|
||||
"_initializing", False)):
|
||||
lock = _get_module_lock(module_name)
|
||||
try:
|
||||
lock.acquire()
|
||||
except _DeadlockError:
|
||||
if module_name == self._name:
|
||||
raise
|
||||
# The parent is being initialised by a thread that
|
||||
# is (transitively) waiting on a lock we hold.
|
||||
# Apply the same policy as _lock_unlock_module():
|
||||
# accept a partially-initialised parent for circular
|
||||
# imports rather than failing the whole chain.
|
||||
continue
|
||||
self._locks.append((module_name, lock))
|
||||
except:
|
||||
# __exit__ is not called when __enter__ raises (e.g. _DeadlockError
|
||||
# on the leaf lock, or KeyboardInterrupt), so release whatever we
|
||||
# already hold to avoid permanently leaking held module locks.
|
||||
for module_name, lock in reversed(self._locks):
|
||||
lock.release()
|
||||
self._locks.clear()
|
||||
raise
|
||||
return self
|
||||
|
||||
def __exit__(self, *args, **kwargs):
|
||||
for module_name, lock in reversed(self._locks):
|
||||
lock.release()
|
||||
self._locks.clear()
|
||||
|
||||
|
||||
# The following two functions are for consumption by Python/import.c.
|
||||
|
||||
def _get_module_lock(name):
|
||||
@@ -1276,7 +1334,13 @@ def _find_and_load(name, import_):
|
||||
module = sys.modules.get(name, _NEEDS_LOADING)
|
||||
if (module is _NEEDS_LOADING or
|
||||
getattr(getattr(module, "__spec__", None), "_initializing", False)):
|
||||
with _ModuleLockManager(name):
|
||||
|
||||
if '.' in name:
|
||||
lock_manager = _HierarchicalLockManager(name)
|
||||
else:
|
||||
lock_manager = _ModuleLockManager(name)
|
||||
|
||||
with lock_manager:
|
||||
module = sys.modules.get(name, _NEEDS_LOADING)
|
||||
if module is _NEEDS_LOADING:
|
||||
return _find_and_load_unlocked(name, import_)
|
||||
|
||||
@@ -324,6 +324,143 @@ raise RuntimeError("Intentional import failure")
|
||||
# Neither thread should have errors about stale modules
|
||||
self.assertEqual(errors, [], f"Race condition detected: {errors}")
|
||||
|
||||
def test_hierarchical_import_deadlock(self):
|
||||
# Regression test for bpo-38884 / gh-83065
|
||||
# Tests that concurrent imports at different hierarchy levels
|
||||
# don't deadlock when parent imports child in __init__.py
|
||||
|
||||
# Create package structure:
|
||||
# package/__init__.py: from package import subpackage
|
||||
# package/subpackage/__init__.py: from package.subpackage.module import *
|
||||
# package/subpackage/module.py: class SomeClass: pass
|
||||
|
||||
pkg_dir = os.path.join(TESTFN, 'hier_deadlock_pkg')
|
||||
os.makedirs(pkg_dir)
|
||||
self.addCleanup(shutil.rmtree, TESTFN)
|
||||
|
||||
subpkg_dir = os.path.join(pkg_dir, 'subpackage')
|
||||
os.makedirs(subpkg_dir)
|
||||
|
||||
with open(os.path.join(pkg_dir, "__init__.py"), "w") as f:
|
||||
f.write("from hier_deadlock_pkg import subpackage\n")
|
||||
|
||||
with open(os.path.join(subpkg_dir, "__init__.py"), "w") as f:
|
||||
f.write("from hier_deadlock_pkg.subpackage.module import *\n")
|
||||
|
||||
with open(os.path.join(subpkg_dir, "module.py"), "w") as f:
|
||||
f.write("class SomeClass:\n pass\n")
|
||||
|
||||
sys.path.insert(0, TESTFN)
|
||||
self.addCleanup(sys.path.remove, TESTFN)
|
||||
self.addCleanup(forget, 'hier_deadlock_pkg')
|
||||
self.addCleanup(forget, 'hier_deadlock_pkg.subpackage')
|
||||
self.addCleanup(forget, 'hier_deadlock_pkg.subpackage.module')
|
||||
|
||||
importlib.invalidate_caches()
|
||||
|
||||
errors = []
|
||||
results = []
|
||||
barrier = threading.Barrier(2)
|
||||
|
||||
def t1():
|
||||
barrier.wait()
|
||||
try:
|
||||
import hier_deadlock_pkg.subpackage
|
||||
results.append('t1_success')
|
||||
except Exception as e:
|
||||
errors.append(('t1', type(e).__name__, str(e)))
|
||||
|
||||
def t2():
|
||||
barrier.wait()
|
||||
try:
|
||||
import hier_deadlock_pkg.subpackage.module
|
||||
results.append('t2_success')
|
||||
except Exception as e:
|
||||
errors.append(('t2', type(e).__name__, str(e)))
|
||||
|
||||
# Run multiple times to increase chance of hitting race condition
|
||||
for i in range(10):
|
||||
for mod in ['hier_deadlock_pkg', 'hier_deadlock_pkg.subpackage',
|
||||
'hier_deadlock_pkg.subpackage.module']:
|
||||
sys.modules.pop(mod, None)
|
||||
|
||||
errors.clear()
|
||||
results.clear()
|
||||
barrier.reset()
|
||||
|
||||
thread1 = threading.Thread(target=t1)
|
||||
thread2 = threading.Thread(target=t2)
|
||||
|
||||
thread1.start()
|
||||
thread2.start()
|
||||
|
||||
thread1.join(timeout=5)
|
||||
thread2.join(timeout=5)
|
||||
|
||||
if thread1.is_alive() or thread2.is_alive():
|
||||
self.fail(f"Threads deadlocked on iteration {i}")
|
||||
|
||||
self.assertEqual(
|
||||
errors, [],
|
||||
f"Import(s) failed on iteration {i}: {errors}")
|
||||
self.assertEqual(
|
||||
sorted(results), ['t1_success', 't2_success'],
|
||||
f"Not all imports succeeded on iteration {i}: {results}")
|
||||
|
||||
def test_cross_package_circular_import(self):
|
||||
# Two packages whose __init__.py each import a submodule of the
|
||||
# other. Concurrent imports of submodules of each must not raise
|
||||
# _DeadlockError; the import system accepts a partially-initialised
|
||||
# parent in this case (see _lock_unlock_module).
|
||||
os.makedirs(os.path.join(TESTFN, "circ_a"))
|
||||
os.makedirs(os.path.join(TESTFN, "circ_b"))
|
||||
self.addCleanup(shutil.rmtree, TESTFN)
|
||||
with open(os.path.join(TESTFN, "circ_a", "__init__.py"), "w") as f:
|
||||
f.write("import time; time.sleep(0.03)\nimport circ_b.other\n")
|
||||
with open(os.path.join(TESTFN, "circ_b", "__init__.py"), "w") as f:
|
||||
f.write("import time; time.sleep(0.03)\nimport circ_a.other\n")
|
||||
for pkg in ("circ_a", "circ_b"):
|
||||
for mod in ("sub.py", "other.py"):
|
||||
with open(os.path.join(TESTFN, pkg, mod), "w") as f:
|
||||
f.write("X = 1\n")
|
||||
|
||||
sys.path.insert(0, TESTFN)
|
||||
self.addCleanup(sys.path.remove, TESTFN)
|
||||
for mod in ("circ_a", "circ_a.sub", "circ_a.other",
|
||||
"circ_b", "circ_b.sub", "circ_b.other"):
|
||||
self.addCleanup(forget, mod)
|
||||
importlib.invalidate_caches()
|
||||
|
||||
errors = []
|
||||
barrier = threading.Barrier(2)
|
||||
|
||||
def do_import(name):
|
||||
barrier.wait()
|
||||
try:
|
||||
importlib.import_module(name)
|
||||
except Exception as e:
|
||||
errors.append((name, type(e).__name__, str(e)))
|
||||
|
||||
for i in range(10):
|
||||
for mod in ("circ_a", "circ_a.sub", "circ_a.other",
|
||||
"circ_b", "circ_b.sub", "circ_b.other"):
|
||||
sys.modules.pop(mod, None)
|
||||
errors.clear()
|
||||
barrier.reset()
|
||||
|
||||
thread1 = threading.Thread(target=do_import, args=("circ_a.sub",))
|
||||
thread2 = threading.Thread(target=do_import, args=("circ_b.sub",))
|
||||
thread1.start()
|
||||
thread2.start()
|
||||
thread1.join(timeout=5)
|
||||
thread2.join(timeout=5)
|
||||
|
||||
if thread1.is_alive() or thread2.is_alive():
|
||||
self.fail(f"Threads deadlocked on iteration {i}")
|
||||
self.assertEqual(
|
||||
errors, [],
|
||||
f"Import(s) failed on iteration {i}: {errors}")
|
||||
|
||||
|
||||
def setUpModule():
|
||||
thread_info = threading_helper.threading_setup()
|
||||
|
||||
@@ -0,0 +1,7 @@
|
||||
Fix a deadlock that could occur when one thread is importing a submodule
|
||||
(for example ``import pkg.sub.mod``) while another thread is importing one
|
||||
of its parent packages (for example ``import pkg.sub``) and that parent's
|
||||
``__init__.py`` itself imports the submodule. The import system now
|
||||
acquires module locks in hierarchical (parent-before-child) order so the
|
||||
two threads serialise instead of raising
|
||||
``_DeadlockError``.
|
||||
Reference in New Issue
Block a user