mirror of
https://github.com/python/cpython.git
synced 2026-05-06 12:49:07 -04:00
[3.14] gh-144513: Skip critical section locking during stop-the-world (gh-144524) (#145570)
This commit is contained in:
@@ -0,0 +1,2 @@
|
||||
Fix potential deadlock when using critical sections during stop-the-world
|
||||
pauses in the free-threaded build.
|
||||
@@ -4,6 +4,8 @@
|
||||
|
||||
#include "parts.h"
|
||||
#include "pycore_critical_section.h"
|
||||
#include "pycore_pystate.h"
|
||||
#include "pycore_pythread.h"
|
||||
|
||||
#ifdef MS_WINDOWS
|
||||
# include <windows.h> // Sleep()
|
||||
@@ -381,6 +383,87 @@ test_critical_section2_reacquisition(PyObject *self, PyObject *Py_UNUSED(args))
|
||||
|
||||
#endif // Py_GIL_DISABLED
|
||||
|
||||
#ifdef Py_CAN_START_THREADS
|
||||
|
||||
// gh-144513: Test that critical sections don't deadlock with stop-the-world.
|
||||
// This test is designed to deadlock (timeout) on builds without the fix.
|
||||
struct test_data_stw {
|
||||
PyObject *obj;
|
||||
Py_ssize_t num_threads;
|
||||
Py_ssize_t started;
|
||||
PyEvent ready;
|
||||
};
|
||||
|
||||
static void
|
||||
thread_stw(void *arg)
|
||||
{
|
||||
struct test_data_stw *test_data = arg;
|
||||
PyGILState_STATE gil = PyGILState_Ensure();
|
||||
|
||||
if (_Py_atomic_add_ssize(&test_data->started, 1) == test_data->num_threads - 1) {
|
||||
_PyEvent_Notify(&test_data->ready);
|
||||
}
|
||||
|
||||
// All threads: acquire critical section and hold it long enough to
|
||||
// trigger TIME_TO_BE_FAIR_NS (1 ms), which causes direct handoff on unlock.
|
||||
Py_BEGIN_CRITICAL_SECTION(test_data->obj);
|
||||
pysleep(10); // 10 ms = 10 x TIME_TO_BE_FAIR_NS
|
||||
Py_END_CRITICAL_SECTION();
|
||||
|
||||
PyGILState_Release(gil);
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
test_critical_sections_stw(PyObject *self, PyObject *Py_UNUSED(args))
|
||||
{
|
||||
// gh-144513: Test that critical sections don't deadlock during STW.
|
||||
//
|
||||
// The deadlock occurs when lock ownership is handed off (due to fairness
|
||||
// after TIME_TO_BE_FAIR_NS) to a thread that has already suspended for
|
||||
// stop-the-world. The STW requester then cannot acquire the lock.
|
||||
//
|
||||
// With the fix, the STW requester detects world_stopped and skips locking.
|
||||
|
||||
#define STW_NUM_THREADS 2
|
||||
struct test_data_stw test_data = {
|
||||
.obj = PyDict_New(),
|
||||
.num_threads = STW_NUM_THREADS,
|
||||
};
|
||||
if (test_data.obj == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
PyThread_handle_t handles[STW_NUM_THREADS];
|
||||
PyThread_ident_t idents[STW_NUM_THREADS];
|
||||
for (Py_ssize_t i = 0; i < STW_NUM_THREADS; i++) {
|
||||
PyThread_start_joinable_thread(&thread_stw, &test_data,
|
||||
&idents[i], &handles[i]);
|
||||
}
|
||||
|
||||
// Wait for threads to start, then let them compete for the lock
|
||||
PyEvent_Wait(&test_data.ready);
|
||||
pysleep(5);
|
||||
|
||||
// Request stop-the-world and try to acquire the critical section.
|
||||
// Without the fix, this may deadlock.
|
||||
PyInterpreterState *interp = PyInterpreterState_Get();
|
||||
_PyEval_StopTheWorld(interp);
|
||||
|
||||
Py_BEGIN_CRITICAL_SECTION(test_data.obj);
|
||||
Py_END_CRITICAL_SECTION();
|
||||
|
||||
_PyEval_StartTheWorld(interp);
|
||||
|
||||
for (Py_ssize_t i = 0; i < STW_NUM_THREADS; i++) {
|
||||
PyThread_join_thread(handles[i]);
|
||||
}
|
||||
#undef STW_NUM_THREADS
|
||||
Py_DECREF(test_data.obj);
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
#endif // Py_CAN_START_THREADS
|
||||
|
||||
static PyMethodDef test_methods[] = {
|
||||
{"test_critical_sections", test_critical_sections, METH_NOARGS},
|
||||
{"test_critical_sections_nest", test_critical_sections_nest, METH_NOARGS},
|
||||
@@ -392,6 +475,7 @@ static PyMethodDef test_methods[] = {
|
||||
#ifdef Py_CAN_START_THREADS
|
||||
{"test_critical_sections_threads", test_critical_sections_threads, METH_NOARGS},
|
||||
{"test_critical_sections_gc", test_critical_sections_gc, METH_NOARGS},
|
||||
{"test_critical_sections_stw", test_critical_sections_stw, METH_NOARGS},
|
||||
#endif
|
||||
{NULL, NULL} /* sentinel */
|
||||
};
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
#include "Python.h"
|
||||
|
||||
#include "pycore_lock.h"
|
||||
#include "pycore_critical_section.h"
|
||||
#include "pycore_interp.h"
|
||||
#include "pycore_lock.h"
|
||||
|
||||
#ifdef Py_GIL_DISABLED
|
||||
static_assert(_Alignof(PyCriticalSection) >= 4,
|
||||
@@ -43,6 +44,15 @@ _PyCriticalSection_BeginSlow(PyCriticalSection *c, PyMutex *m)
|
||||
}
|
||||
}
|
||||
}
|
||||
// If the world is stopped, we don't need to acquire the lock because
|
||||
// there are no other threads that could be accessing the object.
|
||||
// Without this check, acquiring a critical section while the world is
|
||||
// stopped could lead to a deadlock.
|
||||
if (tstate->interp->stoptheworld.world_stopped) {
|
||||
c->_cs_mutex = NULL;
|
||||
c->_cs_prev = 0;
|
||||
return;
|
||||
}
|
||||
c->_cs_mutex = NULL;
|
||||
c->_cs_prev = (uintptr_t)tstate->critical_section;
|
||||
tstate->critical_section = (uintptr_t)c;
|
||||
@@ -58,6 +68,12 @@ _PyCriticalSection2_BeginSlow(PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2,
|
||||
{
|
||||
#ifdef Py_GIL_DISABLED
|
||||
PyThreadState *tstate = _PyThreadState_GET();
|
||||
if (tstate->interp->stoptheworld.world_stopped) {
|
||||
c->_cs_base._cs_mutex = NULL;
|
||||
c->_cs_mutex2 = NULL;
|
||||
c->_cs_base._cs_prev = 0;
|
||||
return;
|
||||
}
|
||||
c->_cs_base._cs_mutex = NULL;
|
||||
c->_cs_mutex2 = NULL;
|
||||
c->_cs_base._cs_prev = tstate->critical_section;
|
||||
|
||||
Reference in New Issue
Block a user