gh-131798: Split _CHECK_AND_ALLOCATE_OBJECT into smaller uops (GH-148433)

Co-authored-by: Hai Zhu <haiizhu@outlook.com>
Co-authored-by: Ken Jin <kenjin4096@gmail.com>
This commit is contained in:
Sacul
2026-04-13 02:31:24 +08:00
committed by GitHub
parent 6f7bb297db
commit 18d7d90ef9
10 changed files with 1413 additions and 1289 deletions
+1 -1
View File
@@ -1371,7 +1371,7 @@ _PyOpcode_macro_expansion[256] = {
[BUILD_STRING] = { .nuops = 1, .uops = { { _BUILD_STRING, OPARG_SIMPLE, 0 } } },
[BUILD_TEMPLATE] = { .nuops = 1, .uops = { { _BUILD_TEMPLATE, OPARG_SIMPLE, 0 } } },
[BUILD_TUPLE] = { .nuops = 1, .uops = { { _BUILD_TUPLE, OPARG_SIMPLE, 0 } } },
[CALL_ALLOC_AND_ENTER_INIT] = { .nuops = 5, .uops = { { _RECORD_CALLABLE, OPARG_SIMPLE, 0 }, { _CHECK_PEP_523, OPARG_SIMPLE, 1 }, { _CHECK_AND_ALLOCATE_OBJECT, 2, 1 }, { _CREATE_INIT_FRAME, OPARG_SIMPLE, 3 }, { _PUSH_FRAME, OPARG_SIMPLE, 3 } } },
[CALL_ALLOC_AND_ENTER_INIT] = { .nuops = 6, .uops = { { _RECORD_CALLABLE, OPARG_SIMPLE, 0 }, { _CHECK_PEP_523, OPARG_SIMPLE, 1 }, { _CHECK_OBJECT, 2, 1 }, { _ALLOCATE_OBJECT, OPARG_SIMPLE, 3 }, { _CREATE_INIT_FRAME, OPARG_SIMPLE, 3 }, { _PUSH_FRAME, OPARG_SIMPLE, 3 } } },
[CALL_BOUND_METHOD_EXACT_ARGS] = { .nuops = 11, .uops = { { _RECORD_BOUND_METHOD, OPARG_SIMPLE, 0 }, { _CHECK_PEP_523, OPARG_SIMPLE, 1 }, { _CHECK_CALL_BOUND_METHOD_EXACT_ARGS, OPARG_SIMPLE, 1 }, { _INIT_CALL_BOUND_METHOD_EXACT_ARGS, OPARG_SIMPLE, 1 }, { _CHECK_FUNCTION_VERSION, 2, 1 }, { _CHECK_FUNCTION_EXACT_ARGS, OPARG_SIMPLE, 3 }, { _CHECK_STACK_SPACE, OPARG_SIMPLE, 3 }, { _CHECK_RECURSION_REMAINING, OPARG_SIMPLE, 3 }, { _INIT_CALL_PY_EXACT_ARGS, OPARG_SIMPLE, 3 }, { _SAVE_RETURN_OFFSET, OPARG_SAVE_RETURN_OFFSET, 3 }, { _PUSH_FRAME, OPARG_SIMPLE, 3 } } },
[CALL_BOUND_METHOD_GENERAL] = { .nuops = 8, .uops = { { _RECORD_BOUND_METHOD, OPARG_SIMPLE, 0 }, { _CHECK_PEP_523, OPARG_SIMPLE, 1 }, { _CHECK_METHOD_VERSION, 2, 1 }, { _EXPAND_METHOD, OPARG_SIMPLE, 3 }, { _CHECK_RECURSION_REMAINING, OPARG_SIMPLE, 3 }, { _PY_FRAME_GENERAL, OPARG_SIMPLE, 3 }, { _SAVE_RETURN_OFFSET, OPARG_SAVE_RETURN_OFFSET, 3 }, { _PUSH_FRAME, OPARG_SIMPLE, 3 } } },
[CALL_BUILTIN_CLASS] = { .nuops = 6, .uops = { { _RECORD_CALLABLE, OPARG_SIMPLE, 0 }, { _GUARD_CALLABLE_BUILTIN_CLASS, OPARG_SIMPLE, 3 }, { _CALL_BUILTIN_CLASS, OPARG_SIMPLE, 3 }, { _POP_TOP_OPARG, OPARG_SIMPLE, 3 }, { _POP_TOP, OPARG_SIMPLE, 3 }, { _CHECK_PERIODIC_AT_END, OPARG_REPLACED, 3 } } },
+1257 -1255
View File
File diff suppressed because it is too large Load Diff
+22 -7
View File
@@ -303,7 +303,8 @@ const uint32_t _PyUop_Flags[MAX_UOP_ID+1] = {
[_CALL_STR_1] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG,
[_GUARD_CALLABLE_TUPLE_1] = HAS_EXIT_FLAG,
[_CALL_TUPLE_1] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG,
[_CHECK_AND_ALLOCATE_OBJECT] = HAS_ARG_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG,
[_CHECK_OBJECT] = HAS_ARG_FLAG | HAS_EXIT_FLAG,
[_ALLOCATE_OBJECT] = HAS_ARG_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG,
[_CREATE_INIT_FRAME] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG | HAS_SYNC_SP_FLAG,
[_EXIT_INIT_CHECK] = HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG,
[_GUARD_CALLABLE_BUILTIN_CLASS] = HAS_ARG_FLAG | HAS_EXIT_FLAG,
@@ -2838,10 +2839,19 @@ const _PyUopCachingInfo _PyUop_Caching[MAX_UOP_ID+1] = {
{ 2, 3, _CALL_TUPLE_1_r32 },
},
},
[_CHECK_AND_ALLOCATE_OBJECT] = {
[_CHECK_OBJECT] = {
.best = { 0, 0, 0, 0 },
.entries = {
{ 0, 0, _CHECK_AND_ALLOCATE_OBJECT_r00 },
{ 0, 0, _CHECK_OBJECT_r00 },
{ -1, -1, -1 },
{ -1, -1, -1 },
{ -1, -1, -1 },
},
},
[_ALLOCATE_OBJECT] = {
.best = { 0, 0, 0, 0 },
.entries = {
{ 0, 0, _ALLOCATE_OBJECT_r00 },
{ -1, -1, -1 },
{ -1, -1, -1 },
{ -1, -1, -1 },
@@ -4344,7 +4354,8 @@ const uint16_t _PyUop_Uncached[MAX_UOP_REGS_ID+1] = {
[_GUARD_CALLABLE_TUPLE_1_r23] = _GUARD_CALLABLE_TUPLE_1,
[_GUARD_CALLABLE_TUPLE_1_r33] = _GUARD_CALLABLE_TUPLE_1,
[_CALL_TUPLE_1_r32] = _CALL_TUPLE_1,
[_CHECK_AND_ALLOCATE_OBJECT_r00] = _CHECK_AND_ALLOCATE_OBJECT,
[_CHECK_OBJECT_r00] = _CHECK_OBJECT,
[_ALLOCATE_OBJECT_r00] = _ALLOCATE_OBJECT,
[_CREATE_INIT_FRAME_r01] = _CREATE_INIT_FRAME,
[_EXIT_INIT_CHECK_r10] = _EXIT_INIT_CHECK,
[_GUARD_CALLABLE_BUILTIN_CLASS_r00] = _GUARD_CALLABLE_BUILTIN_CLASS,
@@ -4612,6 +4623,8 @@ const uint16_t _PyUop_SpillsAndReloads[4][4] = {
};
const char *const _PyOpcode_uop_name[MAX_UOP_REGS_ID+1] = {
[_ALLOCATE_OBJECT] = "_ALLOCATE_OBJECT",
[_ALLOCATE_OBJECT_r00] = "_ALLOCATE_OBJECT_r00",
[_BINARY_OP] = "_BINARY_OP",
[_BINARY_OP_r23] = "_BINARY_OP_r23",
[_BINARY_OP_ADD_FLOAT] = "_BINARY_OP_ADD_FLOAT",
@@ -4787,8 +4800,6 @@ const char *const _PyOpcode_uop_name[MAX_UOP_REGS_ID+1] = {
[_CALL_TYPE_1_r12] = "_CALL_TYPE_1_r12",
[_CALL_TYPE_1_r22] = "_CALL_TYPE_1_r22",
[_CALL_TYPE_1_r32] = "_CALL_TYPE_1_r32",
[_CHECK_AND_ALLOCATE_OBJECT] = "_CHECK_AND_ALLOCATE_OBJECT",
[_CHECK_AND_ALLOCATE_OBJECT_r00] = "_CHECK_AND_ALLOCATE_OBJECT_r00",
[_CHECK_ATTR_CLASS] = "_CHECK_ATTR_CLASS",
[_CHECK_ATTR_CLASS_r01] = "_CHECK_ATTR_CLASS_r01",
[_CHECK_ATTR_CLASS_r11] = "_CHECK_ATTR_CLASS_r11",
@@ -4839,6 +4850,8 @@ const char *const _PyOpcode_uop_name[MAX_UOP_REGS_ID+1] = {
[_CHECK_METHOD_VERSION_r00] = "_CHECK_METHOD_VERSION_r00",
[_CHECK_METHOD_VERSION_KW] = "_CHECK_METHOD_VERSION_KW",
[_CHECK_METHOD_VERSION_KW_r11] = "_CHECK_METHOD_VERSION_KW_r11",
[_CHECK_OBJECT] = "_CHECK_OBJECT",
[_CHECK_OBJECT_r00] = "_CHECK_OBJECT_r00",
[_CHECK_PEP_523] = "_CHECK_PEP_523",
[_CHECK_PEP_523_r00] = "_CHECK_PEP_523_r00",
[_CHECK_PEP_523_r11] = "_CHECK_PEP_523_r11",
@@ -6399,7 +6412,9 @@ int _PyUop_num_popped(int opcode, int oparg)
return 0;
case _CALL_TUPLE_1:
return 3;
case _CHECK_AND_ALLOCATE_OBJECT:
case _CHECK_OBJECT:
return 0;
case _ALLOCATE_OBJECT:
return 0;
case _CREATE_INIT_FRAME:
return 2 + oparg;
+40
View File
@@ -1590,6 +1590,42 @@ class TestUopsOptimization(unittest.TestCase):
# __init__ resolution allows promotion of range to constant
self.assertNotIn("_LOAD_GLOBAL_BUILTINS", uops)
def test_init_guards_removed(self):
class MyPoint:
def __init__(self, x, y):
return None
def testfunc(n):
point_local = MyPoint
for _ in range(n):
p = point_local(1.0, 2.0)
p = point_local(1.0, 2.0)
_, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
self.assertIsNotNone(ex)
# The __init__ call should be traced through via _PUSH_FRAME
count = count_ops(ex, "_CREATE_INIT_FRAME")
self.assertEqual(count, 2)
# __init__ resolution allows promotion of range to constant
count = count_ops(ex, "_CHECK_OBJECT")
self.assertEqual(count, 1)
def test_init_guards_removed_global(self):
def testfunc(n):
for _ in range(n):
p = MyGlobalPoint(1.0, 2.0)
p = MyGlobalPoint(1.0, 2.0)
_, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
self.assertIsNotNone(ex)
# The __init__ call should be traced through via _PUSH_FRAME
count = count_ops(ex, "_CREATE_INIT_FRAME")
self.assertEqual(count, 2)
# __init__ resolution allows promotion of range to constant
count = count_ops(ex, "_CHECK_OBJECT")
self.assertEqual(count, 0)
def test_guard_type_version_locked_propagates(self):
"""
_GUARD_TYPE_VERSION_LOCKED should set the type version on the
@@ -5251,5 +5287,9 @@ class TestObject:
test_object = TestObject()
test_bound_method = TestObject.test.__get__(test_object)
class MyGlobalPoint:
def __init__(self, x, y):
return None
if __name__ == "__main__":
unittest.main()
+8 -1
View File
@@ -1897,7 +1897,7 @@
JUMP_TO_PREDICTED(CALL);
}
}
// _CHECK_AND_ALLOCATE_OBJECT
// _CHECK_OBJECT
{
self_or_null = stack_pointer[-1 - oparg];
callable = stack_pointer[-2 - oparg];
@@ -1919,6 +1919,13 @@
assert(_PyOpcode_Deopt[opcode] == (CALL));
JUMP_TO_PREDICTED(CALL);
}
}
// _ALLOCATE_OBJECT
{
PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable);
assert(PyStackRef_IsNull(self_or_null));
assert(PyType_Check(callable_o));
PyTypeObject *tp = (PyTypeObject *)callable_o;
assert(tp->tp_new == PyBaseObject_Type.tp_new);
assert(tp->tp_flags & Py_TPFLAGS_HEAPTYPE);
assert(tp->tp_alloc == PyType_GenericAlloc);
+11 -2
View File
@@ -4499,15 +4499,23 @@ dummy_func(
POP_TOP +
_CHECK_PERIODIC_AT_END;
op(_CHECK_AND_ALLOCATE_OBJECT, (type_version/2, callable, self_or_null, unused[oparg] -- callable, self_or_null, unused[oparg])) {
op(_CHECK_OBJECT, (type_version/2, callable, self_or_null, unused[oparg] -- callable, self_or_null, unused[oparg])) {
PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable);
EXIT_IF(!PyStackRef_IsNull(self_or_null));
EXIT_IF(!PyType_Check(callable_o));
PyTypeObject *tp = (PyTypeObject *)callable_o;
EXIT_IF(FT_ATOMIC_LOAD_UINT32_RELAXED(tp->tp_version_tag) != type_version);
}
op(_ALLOCATE_OBJECT, (callable, self_or_null, unused[oparg] -- callable, self_or_null, unused[oparg])) {
PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable);
assert(PyStackRef_IsNull(self_or_null));
assert(PyType_Check(callable_o));
PyTypeObject *tp = (PyTypeObject *)callable_o;
assert(tp->tp_new == PyBaseObject_Type.tp_new);
assert(tp->tp_flags & Py_TPFLAGS_HEAPTYPE);
assert(tp->tp_alloc == PyType_GenericAlloc);
PyHeapTypeObject *cls = (PyHeapTypeObject *)callable_o;
PyFunctionObject *init_func = (PyFunctionObject *)FT_ATOMIC_LOAD_PTR_ACQUIRE(cls->_spec_cache.init);
PyCodeObject *code = (PyCodeObject *)init_func->func_code;
@@ -4552,7 +4560,8 @@ dummy_func(
_RECORD_CALLABLE +
unused/1 +
_CHECK_PEP_523 +
_CHECK_AND_ALLOCATE_OBJECT +
_CHECK_OBJECT +
_ALLOCATE_OBJECT +
_CREATE_INIT_FRAME +
_PUSH_FRAME;
+18 -1
View File
@@ -16298,7 +16298,7 @@
break;
}
case _CHECK_AND_ALLOCATE_OBJECT_r00: {
case _CHECK_OBJECT_r00: {
CHECK_CURRENT_CACHED_VALUES(0);
assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE());
_PyStackRef self_or_null;
@@ -16324,6 +16324,23 @@
SET_CURRENT_CACHED_VALUES(0);
JUMP_TO_JUMP_TARGET();
}
SET_CURRENT_CACHED_VALUES(0);
assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE());
break;
}
case _ALLOCATE_OBJECT_r00: {
CHECK_CURRENT_CACHED_VALUES(0);
assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE());
_PyStackRef self_or_null;
_PyStackRef callable;
oparg = CURRENT_OPARG();
self_or_null = stack_pointer[-1 - oparg];
callable = stack_pointer[-2 - oparg];
PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable);
assert(PyStackRef_IsNull(self_or_null));
assert(PyType_Check(callable_o));
PyTypeObject *tp = (PyTypeObject *)callable_o;
assert(tp->tp_new == PyBaseObject_Type.tp_new);
assert(tp->tp_flags & Py_TPFLAGS_HEAPTYPE);
assert(tp->tp_alloc == PyType_GenericAlloc);
+8 -1
View File
@@ -1897,7 +1897,7 @@
JUMP_TO_PREDICTED(CALL);
}
}
// _CHECK_AND_ALLOCATE_OBJECT
// _CHECK_OBJECT
{
self_or_null = stack_pointer[-1 - oparg];
callable = stack_pointer[-2 - oparg];
@@ -1919,6 +1919,13 @@
assert(_PyOpcode_Deopt[opcode] == (CALL));
JUMP_TO_PREDICTED(CALL);
}
}
// _ALLOCATE_OBJECT
{
PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable);
assert(PyStackRef_IsNull(self_or_null));
assert(PyType_Check(callable_o));
PyTypeObject *tp = (PyTypeObject *)callable_o;
assert(tp->tp_new == PyBaseObject_Type.tp_new);
assert(tp->tp_flags & Py_TPFLAGS_HEAPTYPE);
assert(tp->tp_alloc == PyType_GenericAlloc);
+23 -12
View File
@@ -1075,28 +1075,39 @@ dummy_func(void) {
ex_frame = PyJitRef_WrapInvalid(frame_new_from_symbol(ctx, func_st, NULL, 0));
}
op(_CHECK_AND_ALLOCATE_OBJECT, (type_version/2, callable, self_or_null, args[oparg] -- callable, self_or_null, args[oparg])) {
(void)args;
op(_CHECK_OBJECT, (type_version/2, callable, self_or_null, unused[oparg] -- callable, self_or_null, unused[oparg])) {
PyObject *probable_callable = sym_get_probable_value(callable);
assert(probable_callable != NULL);
assert(PyType_Check(probable_callable));
PyTypeObject *tp = (PyTypeObject *)probable_callable;
if (tp->tp_version_tag == type_version) {
// If the type version has not changed since we last saw it,
// then we know this __init__ is definitely the same one as in the cache.
// We can promote callable to a known constant. This does not need a
// type watcher, as we do not remove this _CHECK_AND_ALLOCATE_OBJECT guard.
// TODO: split up _CHECK_AND_ALLOCATE_OBJECT to the check then alloate, so we can
// eliminate the check.
PyHeapTypeObject *cls = (PyHeapTypeObject *)probable_callable;
PyObject *const_callable = sym_get_const(ctx, callable);
bool is_probable = const_callable == NULL && probable_callable != NULL;
PyObject *callable_o = const_callable != NULL ? const_callable : probable_callable;
if (sym_is_null(self_or_null) &&
callable_o != NULL &&
PyType_Check(callable_o) &&
((PyTypeObject *)callable_o)->tp_version_tag == type_version) {
// Probable types need the guard.
if (!is_probable) {
ADD_OP(_NOP, 0, 0);
}
else {
// Promote the probable type, as we have
// guarded on it.
sym_set_const(callable, callable_o);
}
PyHeapTypeObject *cls = (PyHeapTypeObject *)callable_o;
PyObject *init = cls->_spec_cache.init;
assert(init != NULL);
assert(PyFunction_Check(init));
callable = sym_new_const(ctx, init);
PyType_Watch(TYPE_WATCHER_ID, callable_o);
_Py_BloomFilter_Add(dependencies, callable_o);;
}
else {
callable = sym_new_not_null(ctx);
}
}
op(_ALLOCATE_OBJECT, (callable, self_or_null, unused[oparg] -- callable, self_or_null, unused[oparg])) {
self_or_null = sym_new_not_null(ctx);
}
+25 -9
View File
@@ -3917,31 +3917,47 @@
break;
}
case _CHECK_AND_ALLOCATE_OBJECT: {
JitOptRef *args;
case _CHECK_OBJECT: {
JitOptRef self_or_null;
JitOptRef callable;
args = &stack_pointer[-oparg];
self_or_null = stack_pointer[-1 - oparg];
callable = stack_pointer[-2 - oparg];
uint32_t type_version = (uint32_t)this_instr->operand0;
(void)args;
PyObject *probable_callable = sym_get_probable_value(callable);
assert(probable_callable != NULL);
assert(PyType_Check(probable_callable));
PyTypeObject *tp = (PyTypeObject *)probable_callable;
if (tp->tp_version_tag == type_version) {
PyHeapTypeObject *cls = (PyHeapTypeObject *)probable_callable;
PyObject *const_callable = sym_get_const(ctx, callable);
bool is_probable = const_callable == NULL && probable_callable != NULL;
PyObject *callable_o = const_callable != NULL ? const_callable : probable_callable;
if (sym_is_null(self_or_null) &&
callable_o != NULL &&
PyType_Check(callable_o) &&
((PyTypeObject *)callable_o)->tp_version_tag == type_version) {
if (!is_probable) {
ADD_OP(_NOP, 0, 0);
}
else {
sym_set_const(callable, callable_o);
}
PyHeapTypeObject *cls = (PyHeapTypeObject *)callable_o;
PyObject *init = cls->_spec_cache.init;
assert(init != NULL);
assert(PyFunction_Check(init));
callable = sym_new_const(ctx, init);
stack_pointer[-2 - oparg] = callable;
PyType_Watch(TYPE_WATCHER_ID, callable_o);
_Py_BloomFilter_Add(dependencies, callable_o);;
}
else {
callable = sym_new_not_null(ctx);
}
self_or_null = sym_new_not_null(ctx);
stack_pointer[-2 - oparg] = callable;
break;
}
case _ALLOCATE_OBJECT: {
JitOptRef self_or_null;
self_or_null = stack_pointer[-1 - oparg];
self_or_null = sym_new_not_null(ctx);
stack_pointer[-1 - oparg] = self_or_null;
break;
}