GH-126910: Make _Py_get_machine_stack_pointer return the stack pointer (#147945)

* Make _Py_get_machine_stack_pointer return the stack pointer (or close to it), not the frame pointer

* Make ``_Py_ReachedRecursionLimit`` inline again
* Remove ``_Py_MakeRecCheck`` relacing its use with ``_Py_ReachedRecursionLimit``
* Move stack swtiching check into ``_Py_CheckRecursiveCall``
This commit is contained in:
Mark Shannon
2026-04-01 17:15:13 +01:00
committed by GitHub
parent 9e5b838372
commit 255026d9ee
7 changed files with 37 additions and 55 deletions
+7 -9
View File
@@ -211,16 +211,16 @@ extern void _PyEval_DeactivateOpCache(void);
/* --- _Py_EnterRecursiveCall() ----------------------------------------- */
static inline int _Py_MakeRecCheck(PyThreadState *tstate) {
static inline int _Py_ReachedRecursionLimit(PyThreadState *tstate) {
uintptr_t here_addr = _Py_get_machine_stack_pointer();
_PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate;
// Overflow if stack pointer is between soft limit and the base of the hardware stack.
// If it is below the hardware stack base, assume that we have the wrong stack limits, and do nothing.
// We could have the wrong stack limits because of limited platform support, or user-space threads.
// Possible overflow if stack pointer is beyond the soft limit.
// _Py_CheckRecursiveCall will check for corner cases and
// report an error if there is an overflow.
#if _Py_STACK_GROWS_DOWN
return here_addr < _tstate->c_stack_soft_limit && here_addr >= _tstate->c_stack_soft_limit - 2 * _PyOS_STACK_MARGIN_BYTES;
return here_addr < _tstate->c_stack_soft_limit;
#else
return here_addr > _tstate->c_stack_soft_limit && here_addr <= _tstate->c_stack_soft_limit + 2 * _PyOS_STACK_MARGIN_BYTES;
return here_addr > _tstate->c_stack_soft_limit;
#endif
}
@@ -235,7 +235,7 @@ PyAPI_FUNC(int) _Py_CheckRecursiveCallPy(
static inline int _Py_EnterRecursiveCallTstate(PyThreadState *tstate,
const char *where) {
return (_Py_MakeRecCheck(tstate) && _Py_CheckRecursiveCall(tstate, where));
return (_Py_ReachedRecursionLimit(tstate) && _Py_CheckRecursiveCall(tstate, where));
}
static inline int _Py_EnterRecursiveCall(const char *where) {
@@ -249,8 +249,6 @@ static inline void _Py_LeaveRecursiveCallTstate(PyThreadState *tstate) {
PyAPI_FUNC(void) _Py_InitializeRecursionLimits(PyThreadState *tstate);
PyAPI_FUNC(int) _Py_ReachedRecursionLimit(PyThreadState *tstate);
// Export for test_peg_generator
PyAPI_FUNC(int) _Py_ReachedRecursionLimitWithMargin(
PyThreadState *tstate,
+11 -11
View File
@@ -306,23 +306,23 @@ _Py_AssertHoldsTstateFunc(const char *func)
#define _Py_AssertHoldsTstate()
#endif
#if !_Py__has_builtin(__builtin_frame_address) && !defined(__GNUC__) && !defined(_MSC_VER)
static uintptr_t return_pointer_as_int(char* p) {
return (uintptr_t)p;
}
#endif
static inline uintptr_t
_Py_get_machine_stack_pointer(void) {
#if _Py__has_builtin(__builtin_frame_address) || defined(__GNUC__)
return (uintptr_t)__builtin_frame_address(0);
#elif defined(_MSC_VER)
return (uintptr_t)_AddressOfReturnAddress();
uintptr_t result;
#if !defined(_MSC_VER) && defined(_M_ARM64)
result = __getReg(31);
#elif defined(_MSC_VER) && defined(_M_X64)
result = (uintptr_t)_AddressOfReturnAddress();
#elif defined(__aarch64__)
__asm__ ("mov %0, sp" : "=r" (result));
#elif defined(__x86_64__)
__asm__("{movq %%rsp, %0" : "=r" (result));
#else
char here;
/* Avoid compiler warning about returning stack address */
return return_pointer_as_int(&here);
result = (uintptr_t)&here;
#endif
return result;
}
static inline intptr_t
+2 -1
View File
@@ -46,7 +46,8 @@ extern PyObject * _Py_CompileStringObjectWithModule(
* stack consumption of PyEval_EvalDefault */
#if (defined(Py_DEBUG) \
|| defined(_Py_ADDRESS_SANITIZER) \
|| defined(_Py_THREAD_SANITIZER))
|| defined(_Py_THREAD_SANITIZER)) \
|| defined(_Py_UNDEFINED_BEHAVIOR_SANITIZER)
# define _PyOS_LOG2_STACK_MARGIN 12
#else
# define _PyOS_LOG2_STACK_MARGIN 11
+5
View File
@@ -598,6 +598,11 @@ extern "C" {
# define _Py_NO_SANITIZE_THREAD __attribute__((no_sanitize_thread))
# endif
# endif
# if __has_feature(undefined_behavior_sanitizer)
# if !defined(_Py_UNDEFINED_BEHAVIOR_SANITIZER)
# define _Py_UNDEFINED_BEHAVIOR_SANITIZER
# endif
# endif
#elif defined(__GNUC__)
# if defined(__SANITIZE_ADDRESS__)
# define _Py_ADDRESS_SANITIZER
+1 -1
View File
@@ -707,7 +707,7 @@ class ElementDeclHandlerTest(unittest.TestCase):
def test_deeply_nested_content_model(self):
# This should raise a RecursionError and not crash.
# See https://github.com/python/cpython/issues/145986.
N = 500_000
N = 800_000
data = (
b'<!DOCTYPE root [\n<!ELEMENT root '
+ b'(a, ' * N + b'a' + b')' * N
+10 -32
View File
@@ -49,20 +49,6 @@ _Py_ReachedRecursionLimitWithMargin(PyThreadState *tstate, int margin_count)
#endif
}
void
_Py_EnterRecursiveCallUnchecked(PyThreadState *tstate)
{
uintptr_t here_addr = _Py_get_machine_stack_pointer();
_PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate;
#if _Py_STACK_GROWS_DOWN
if (here_addr < _tstate->c_stack_hard_limit) {
#else
if (here_addr > _tstate->c_stack_hard_limit) {
#endif
Py_FatalError("Unchecked stack overflow.");
}
}
#if defined(__s390x__)
# define Py_C_STACK_SIZE 320000
#elif defined(_WIN32)
@@ -278,7 +264,7 @@ PyUnstable_ThreadState_ResetStackProtection(PyThreadState *tstate)
/* The function _Py_EnterRecursiveCallTstate() only calls _Py_CheckRecursiveCall()
if the stack pointer is between the stack base and c_stack_hard_limit. */
if the stack pointer is beyond c_stack_soft_limit. */
int
_Py_CheckRecursiveCall(PyThreadState *tstate, const char *where)
{
@@ -287,16 +273,21 @@ _Py_CheckRecursiveCall(PyThreadState *tstate, const char *where)
assert(_tstate->c_stack_soft_limit != 0);
assert(_tstate->c_stack_hard_limit != 0);
#if _Py_STACK_GROWS_DOWN
assert(here_addr >= _tstate->c_stack_hard_limit - _PyOS_STACK_MARGIN_BYTES);
if (here_addr < _tstate->c_stack_hard_limit) {
/* Overflowing while handling an overflow. Give up. */
if (here_addr < _tstate->c_stack_hard_limit - _PyOS_STACK_MARGIN_BYTES) {
// Far out of bounds -- Assume stack switching has occurred
return 0;
}
int kbytes_used = (int)(_tstate->c_stack_top - here_addr)/1024;
#else
assert(here_addr <= _tstate->c_stack_hard_limit + _PyOS_STACK_MARGIN_BYTES);
if (here_addr > _tstate->c_stack_hard_limit) {
/* Overflowing while handling an overflow. Give up. */
if (here_addr > _tstate->c_stack_hard_limit + _PyOS_STACK_MARGIN_BYTES) {
// Far out of bounds -- Assume stack switching has occurred
return 0;
}
int kbytes_used = (int)(here_addr - _tstate->c_stack_top)/1024;
#endif
/* Too much stack used to safely raise an exception. Give up. */
char buffer[80];
snprintf(buffer, 80, "Unrecoverable stack overflow (used %d kB)%s", kbytes_used, where);
Py_FatalError(buffer);
@@ -1201,19 +1192,6 @@ _PyEval_GetIter(_PyStackRef iterable, _PyStackRef *index_or_null, int yield_from
return PyStackRef_FromPyObjectSteal(iter_o);
}
Py_NO_INLINE int
_Py_ReachedRecursionLimit(PyThreadState *tstate) {
uintptr_t here_addr = _Py_get_machine_stack_pointer();
_PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate;
assert(_tstate->c_stack_hard_limit != 0);
#if _Py_STACK_GROWS_DOWN
return here_addr <= _tstate->c_stack_soft_limit;
#else
return here_addr >= _tstate->c_stack_soft_limit;
#endif
}
#if (defined(__GNUC__) && __GNUC__ >= 10 && !defined(__clang__)) && defined(__x86_64__)
/*
* gh-129987: The SLP autovectorizer can cause poor code generation for
+1 -1
View File
@@ -734,7 +734,7 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction trace[], siz
return 0;
}
/* One-off compilation of the jit entry shim
/* One-off compilation of the jit entry shim.
* We compile this once only as it effectively a normal
* function, but we need to use the JIT because it needs
* to understand the jit-specific calling convention.