[3.14] gh-144513: Skip critical section locking during stop-the-world (gh-144524) (#145570)

This commit is contained in:
Sam Gross
2026-03-06 12:00:17 -05:00
committed by GitHub
parent 89b69db17a
commit 7b9508f4b0
3 changed files with 103 additions and 1 deletions
@@ -0,0 +1,2 @@
Fix potential deadlock when using critical sections during stop-the-world
pauses in the free-threaded build.
@@ -4,6 +4,8 @@
#include "parts.h"
#include "pycore_critical_section.h"
#include "pycore_pystate.h"
#include "pycore_pythread.h"
#ifdef MS_WINDOWS
# include <windows.h> // Sleep()
@@ -381,6 +383,87 @@ test_critical_section2_reacquisition(PyObject *self, PyObject *Py_UNUSED(args))
#endif // Py_GIL_DISABLED
#ifdef Py_CAN_START_THREADS
// gh-144513: Test that critical sections don't deadlock with stop-the-world.
// This test is designed to deadlock (timeout) on builds without the fix.
struct test_data_stw {
PyObject *obj;
Py_ssize_t num_threads;
Py_ssize_t started;
PyEvent ready;
};
static void
thread_stw(void *arg)
{
struct test_data_stw *test_data = arg;
PyGILState_STATE gil = PyGILState_Ensure();
if (_Py_atomic_add_ssize(&test_data->started, 1) == test_data->num_threads - 1) {
_PyEvent_Notify(&test_data->ready);
}
// All threads: acquire critical section and hold it long enough to
// trigger TIME_TO_BE_FAIR_NS (1 ms), which causes direct handoff on unlock.
Py_BEGIN_CRITICAL_SECTION(test_data->obj);
pysleep(10); // 10 ms = 10 x TIME_TO_BE_FAIR_NS
Py_END_CRITICAL_SECTION();
PyGILState_Release(gil);
}
static PyObject *
test_critical_sections_stw(PyObject *self, PyObject *Py_UNUSED(args))
{
// gh-144513: Test that critical sections don't deadlock during STW.
//
// The deadlock occurs when lock ownership is handed off (due to fairness
// after TIME_TO_BE_FAIR_NS) to a thread that has already suspended for
// stop-the-world. The STW requester then cannot acquire the lock.
//
// With the fix, the STW requester detects world_stopped and skips locking.
#define STW_NUM_THREADS 2
struct test_data_stw test_data = {
.obj = PyDict_New(),
.num_threads = STW_NUM_THREADS,
};
if (test_data.obj == NULL) {
return NULL;
}
PyThread_handle_t handles[STW_NUM_THREADS];
PyThread_ident_t idents[STW_NUM_THREADS];
for (Py_ssize_t i = 0; i < STW_NUM_THREADS; i++) {
PyThread_start_joinable_thread(&thread_stw, &test_data,
&idents[i], &handles[i]);
}
// Wait for threads to start, then let them compete for the lock
PyEvent_Wait(&test_data.ready);
pysleep(5);
// Request stop-the-world and try to acquire the critical section.
// Without the fix, this may deadlock.
PyInterpreterState *interp = PyInterpreterState_Get();
_PyEval_StopTheWorld(interp);
Py_BEGIN_CRITICAL_SECTION(test_data.obj);
Py_END_CRITICAL_SECTION();
_PyEval_StartTheWorld(interp);
for (Py_ssize_t i = 0; i < STW_NUM_THREADS; i++) {
PyThread_join_thread(handles[i]);
}
#undef STW_NUM_THREADS
Py_DECREF(test_data.obj);
Py_RETURN_NONE;
}
#endif // Py_CAN_START_THREADS
static PyMethodDef test_methods[] = {
{"test_critical_sections", test_critical_sections, METH_NOARGS},
{"test_critical_sections_nest", test_critical_sections_nest, METH_NOARGS},
@@ -392,6 +475,7 @@ static PyMethodDef test_methods[] = {
#ifdef Py_CAN_START_THREADS
{"test_critical_sections_threads", test_critical_sections_threads, METH_NOARGS},
{"test_critical_sections_gc", test_critical_sections_gc, METH_NOARGS},
{"test_critical_sections_stw", test_critical_sections_stw, METH_NOARGS},
#endif
{NULL, NULL} /* sentinel */
};
+17 -1
View File
@@ -1,7 +1,8 @@
#include "Python.h"
#include "pycore_lock.h"
#include "pycore_critical_section.h"
#include "pycore_interp.h"
#include "pycore_lock.h"
#ifdef Py_GIL_DISABLED
static_assert(_Alignof(PyCriticalSection) >= 4,
@@ -43,6 +44,15 @@ _PyCriticalSection_BeginSlow(PyCriticalSection *c, PyMutex *m)
}
}
}
// If the world is stopped, we don't need to acquire the lock because
// there are no other threads that could be accessing the object.
// Without this check, acquiring a critical section while the world is
// stopped could lead to a deadlock.
if (tstate->interp->stoptheworld.world_stopped) {
c->_cs_mutex = NULL;
c->_cs_prev = 0;
return;
}
c->_cs_mutex = NULL;
c->_cs_prev = (uintptr_t)tstate->critical_section;
tstate->critical_section = (uintptr_t)c;
@@ -58,6 +68,12 @@ _PyCriticalSection2_BeginSlow(PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2,
{
#ifdef Py_GIL_DISABLED
PyThreadState *tstate = _PyThreadState_GET();
if (tstate->interp->stoptheworld.world_stopped) {
c->_cs_base._cs_mutex = NULL;
c->_cs_mutex2 = NULL;
c->_cs_base._cs_prev = 0;
return;
}
c->_cs_base._cs_mutex = NULL;
c->_cs_mutex2 = NULL;
c->_cs_base._cs_prev = tstate->critical_section;