gh-146393: Optimize float division operations by mutating uniquely-referenced operands in place (JIT only) (GH-146397)

Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Pieter Eendebak
2026-04-14 20:08:04 +02:00
committed by GitHub
parent bdb0b36192
commit 95cbd4a232
13 changed files with 1940 additions and 1263 deletions
+2 -2
View File
@@ -1092,7 +1092,7 @@ struct opcode_metadata {
PyAPI_DATA(const struct opcode_metadata) _PyOpcode_opcode_metadata[267];
#ifdef NEED_OPCODE_METADATA
const struct opcode_metadata _PyOpcode_opcode_metadata[267] = {
[BINARY_OP] = { true, INSTR_FMT_IBC0000, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG },
[BINARY_OP] = { true, INSTR_FMT_IBC0000, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG | HAS_RECORDS_VALUE_FLAG },
[BINARY_OP_ADD_FLOAT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG },
[BINARY_OP_ADD_INT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG },
[BINARY_OP_ADD_UNICODE] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG },
@@ -1345,7 +1345,7 @@ extern const struct opcode_macro_expansion _PyOpcode_macro_expansion[256];
#ifdef NEED_OPCODE_METADATA
const struct opcode_macro_expansion
_PyOpcode_macro_expansion[256] = {
[BINARY_OP] = { .nuops = 3, .uops = { { _BINARY_OP, OPARG_SIMPLE, 4 }, { _POP_TOP, OPARG_SIMPLE, 4 }, { _POP_TOP, OPARG_SIMPLE, 4 } } },
[BINARY_OP] = { .nuops = 5, .uops = { { _RECORD_TOS, OPARG_SIMPLE, 0 }, { _RECORD_NOS, OPARG_SIMPLE, 0 }, { _BINARY_OP, OPARG_SIMPLE, 4 }, { _POP_TOP, OPARG_SIMPLE, 4 }, { _POP_TOP, OPARG_SIMPLE, 4 } } },
[BINARY_OP_ADD_FLOAT] = { .nuops = 5, .uops = { { _GUARD_TOS_FLOAT, OPARG_SIMPLE, 0 }, { _GUARD_NOS_FLOAT, OPARG_SIMPLE, 0 }, { _BINARY_OP_ADD_FLOAT, OPARG_SIMPLE, 5 }, { _POP_TOP_FLOAT, OPARG_SIMPLE, 5 }, { _POP_TOP_FLOAT, OPARG_SIMPLE, 5 } } },
[BINARY_OP_ADD_INT] = { .nuops = 5, .uops = { { _GUARD_TOS_INT, OPARG_SIMPLE, 0 }, { _GUARD_NOS_INT, OPARG_SIMPLE, 0 }, { _BINARY_OP_ADD_INT, OPARG_SIMPLE, 5 }, { _POP_TOP_INT, OPARG_SIMPLE, 5 }, { _POP_TOP_INT, OPARG_SIMPLE, 5 } } },
[BINARY_OP_ADD_UNICODE] = { .nuops = 5, .uops = { { _GUARD_TOS_UNICODE, OPARG_SIMPLE, 0 }, { _GUARD_NOS_UNICODE, OPARG_SIMPLE, 0 }, { _BINARY_OP_ADD_UNICODE, OPARG_SIMPLE, 5 }, { _POP_TOP_UNICODE, OPARG_SIMPLE, 5 }, { _POP_TOP_UNICODE, OPARG_SIMPLE, 5 } } },
+1254 -1244
View File
File diff suppressed because it is too large Load Diff
+53
View File
@@ -125,6 +125,9 @@ const uint32_t _PyUop_Flags[MAX_UOP_ID+1] = {
[_BINARY_OP_ADD_FLOAT_INPLACE_RIGHT] = 0,
[_BINARY_OP_MULTIPLY_FLOAT_INPLACE_RIGHT] = 0,
[_BINARY_OP_SUBTRACT_FLOAT_INPLACE_RIGHT] = 0,
[_BINARY_OP_TRUEDIV_FLOAT] = HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG,
[_BINARY_OP_TRUEDIV_FLOAT_INPLACE] = HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG,
[_BINARY_OP_TRUEDIV_FLOAT_INPLACE_RIGHT] = HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG,
[_BINARY_OP_ADD_UNICODE] = HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_PURE_FLAG,
[_BINARY_OP_INPLACE_ADD_UNICODE] = HAS_LOCAL_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
[_GUARD_BINARY_OP_EXTEND] = HAS_EXIT_FLAG | HAS_ESCAPES_FLAG,
@@ -1239,6 +1242,33 @@ const _PyUopCachingInfo _PyUop_Caching[MAX_UOP_ID+1] = {
{ -1, -1, -1 },
},
},
[_BINARY_OP_TRUEDIV_FLOAT] = {
.best = { 2, 2, 2, 2 },
.entries = {
{ -1, -1, -1 },
{ -1, -1, -1 },
{ 3, 2, _BINARY_OP_TRUEDIV_FLOAT_r23 },
{ -1, -1, -1 },
},
},
[_BINARY_OP_TRUEDIV_FLOAT_INPLACE] = {
.best = { 0, 1, 2, 2 },
.entries = {
{ 3, 0, _BINARY_OP_TRUEDIV_FLOAT_INPLACE_r03 },
{ 3, 1, _BINARY_OP_TRUEDIV_FLOAT_INPLACE_r13 },
{ 3, 2, _BINARY_OP_TRUEDIV_FLOAT_INPLACE_r23 },
{ -1, -1, -1 },
},
},
[_BINARY_OP_TRUEDIV_FLOAT_INPLACE_RIGHT] = {
.best = { 0, 1, 2, 2 },
.entries = {
{ 3, 0, _BINARY_OP_TRUEDIV_FLOAT_INPLACE_RIGHT_r03 },
{ 3, 1, _BINARY_OP_TRUEDIV_FLOAT_INPLACE_RIGHT_r13 },
{ 3, 2, _BINARY_OP_TRUEDIV_FLOAT_INPLACE_RIGHT_r23 },
{ -1, -1, -1 },
},
},
[_BINARY_OP_ADD_UNICODE] = {
.best = { 0, 1, 2, 2 },
.entries = {
@@ -4024,6 +4054,13 @@ const uint16_t _PyUop_Uncached[MAX_UOP_REGS_ID+1] = {
[_BINARY_OP_SUBTRACT_FLOAT_INPLACE_RIGHT_r03] = _BINARY_OP_SUBTRACT_FLOAT_INPLACE_RIGHT,
[_BINARY_OP_SUBTRACT_FLOAT_INPLACE_RIGHT_r13] = _BINARY_OP_SUBTRACT_FLOAT_INPLACE_RIGHT,
[_BINARY_OP_SUBTRACT_FLOAT_INPLACE_RIGHT_r23] = _BINARY_OP_SUBTRACT_FLOAT_INPLACE_RIGHT,
[_BINARY_OP_TRUEDIV_FLOAT_r23] = _BINARY_OP_TRUEDIV_FLOAT,
[_BINARY_OP_TRUEDIV_FLOAT_INPLACE_r03] = _BINARY_OP_TRUEDIV_FLOAT_INPLACE,
[_BINARY_OP_TRUEDIV_FLOAT_INPLACE_r13] = _BINARY_OP_TRUEDIV_FLOAT_INPLACE,
[_BINARY_OP_TRUEDIV_FLOAT_INPLACE_r23] = _BINARY_OP_TRUEDIV_FLOAT_INPLACE,
[_BINARY_OP_TRUEDIV_FLOAT_INPLACE_RIGHT_r03] = _BINARY_OP_TRUEDIV_FLOAT_INPLACE_RIGHT,
[_BINARY_OP_TRUEDIV_FLOAT_INPLACE_RIGHT_r13] = _BINARY_OP_TRUEDIV_FLOAT_INPLACE_RIGHT,
[_BINARY_OP_TRUEDIV_FLOAT_INPLACE_RIGHT_r23] = _BINARY_OP_TRUEDIV_FLOAT_INPLACE_RIGHT,
[_BINARY_OP_ADD_UNICODE_r03] = _BINARY_OP_ADD_UNICODE,
[_BINARY_OP_ADD_UNICODE_r13] = _BINARY_OP_ADD_UNICODE,
[_BINARY_OP_ADD_UNICODE_r23] = _BINARY_OP_ADD_UNICODE,
@@ -4745,6 +4782,16 @@ const char *const _PyOpcode_uop_name[MAX_UOP_REGS_ID+1] = {
[_BINARY_OP_SUBTRACT_INT_INPLACE_RIGHT_r03] = "_BINARY_OP_SUBTRACT_INT_INPLACE_RIGHT_r03",
[_BINARY_OP_SUBTRACT_INT_INPLACE_RIGHT_r13] = "_BINARY_OP_SUBTRACT_INT_INPLACE_RIGHT_r13",
[_BINARY_OP_SUBTRACT_INT_INPLACE_RIGHT_r23] = "_BINARY_OP_SUBTRACT_INT_INPLACE_RIGHT_r23",
[_BINARY_OP_TRUEDIV_FLOAT] = "_BINARY_OP_TRUEDIV_FLOAT",
[_BINARY_OP_TRUEDIV_FLOAT_r23] = "_BINARY_OP_TRUEDIV_FLOAT_r23",
[_BINARY_OP_TRUEDIV_FLOAT_INPLACE] = "_BINARY_OP_TRUEDIV_FLOAT_INPLACE",
[_BINARY_OP_TRUEDIV_FLOAT_INPLACE_r03] = "_BINARY_OP_TRUEDIV_FLOAT_INPLACE_r03",
[_BINARY_OP_TRUEDIV_FLOAT_INPLACE_r13] = "_BINARY_OP_TRUEDIV_FLOAT_INPLACE_r13",
[_BINARY_OP_TRUEDIV_FLOAT_INPLACE_r23] = "_BINARY_OP_TRUEDIV_FLOAT_INPLACE_r23",
[_BINARY_OP_TRUEDIV_FLOAT_INPLACE_RIGHT] = "_BINARY_OP_TRUEDIV_FLOAT_INPLACE_RIGHT",
[_BINARY_OP_TRUEDIV_FLOAT_INPLACE_RIGHT_r03] = "_BINARY_OP_TRUEDIV_FLOAT_INPLACE_RIGHT_r03",
[_BINARY_OP_TRUEDIV_FLOAT_INPLACE_RIGHT_r13] = "_BINARY_OP_TRUEDIV_FLOAT_INPLACE_RIGHT_r13",
[_BINARY_OP_TRUEDIV_FLOAT_INPLACE_RIGHT_r23] = "_BINARY_OP_TRUEDIV_FLOAT_INPLACE_RIGHT_r23",
[_BINARY_SLICE] = "_BINARY_SLICE",
[_BINARY_SLICE_r31] = "_BINARY_SLICE_r31",
[_BUILD_INTERPOLATION] = "_BUILD_INTERPOLATION",
@@ -6077,6 +6124,12 @@ int _PyUop_num_popped(int opcode, int oparg)
return 2;
case _BINARY_OP_SUBTRACT_FLOAT_INPLACE_RIGHT:
return 2;
case _BINARY_OP_TRUEDIV_FLOAT:
return 2;
case _BINARY_OP_TRUEDIV_FLOAT_INPLACE:
return 2;
case _BINARY_OP_TRUEDIV_FLOAT_INPLACE_RIGHT:
return 2;
case _BINARY_OP_ADD_UNICODE:
return 2;
case _BINARY_OP_INPLACE_ADD_UNICODE:
+165
View File
@@ -3674,6 +3674,171 @@ class TestUopsOptimization(unittest.TestCase):
uops = get_opnames(ex)
self.assertNotIn("_UNARY_NEGATIVE_FLOAT_INPLACE", uops)
def test_float_truediv_inplace_unique_lhs(self):
# (a + b) / (c + d): LHS is unique float from add, RHS is unique
# float from add. The division reuses the LHS in place.
def testfunc(args):
a, b, c, d, n = args
total = 0.0
for _ in range(n):
total += (a + b) / (c + d)
return total
res, ex = self._run_with_optimizer(testfunc, (2.0, 3.0, 1.0, 3.0, TIER2_THRESHOLD))
self.assertAlmostEqual(res, TIER2_THRESHOLD * 1.25)
self.assertIsNotNone(ex)
uops = get_opnames(ex)
self.assertIn("_BINARY_OP_TRUEDIV_FLOAT_INPLACE", uops)
def test_float_truediv_inplace_unique_rhs(self):
# x = c + d stores to a local (not unique when reloaded).
# (a + b) is unique. The division should use inplace on the RHS.
def testfunc(args):
a, b, c, d, n = args
total = 0.0
for _ in range(n):
x = c + d
total += x / (a + b)
return total
res, ex = self._run_with_optimizer(testfunc, (2.0, 3.0, 4.0, 5.0, TIER2_THRESHOLD))
self.assertAlmostEqual(res, TIER2_THRESHOLD * (9.0 / 5.0))
self.assertIsNotNone(ex)
uops = get_opnames(ex)
self.assertIn("_BINARY_OP_TRUEDIV_FLOAT_INPLACE_RIGHT", uops)
def test_float_truediv_speculative_guards_from_tracing(self):
# a, b are locals with no statically known type. _RECORD_TOS /
# _RECORD_NOS (added to the BINARY_OP macro) capture the observed
# operand types during tracing, and the optimizer then speculatively
# emits _GUARD_{TOS,NOS}_FLOAT and specializes the division.
def testfunc(args):
a, b, n = args
total = 0.0
for _ in range(n):
total += a / b
return total
res, ex = self._run_with_optimizer(testfunc, (10.0, 3.0, TIER2_THRESHOLD))
self.assertAlmostEqual(res, TIER2_THRESHOLD * (10.0 / 3.0))
self.assertIsNotNone(ex)
uops = get_opnames(ex)
self.assertIn("_GUARD_TOS_FLOAT", uops)
self.assertIn("_GUARD_NOS_FLOAT", uops)
self.assertIn("_BINARY_OP_TRUEDIV_FLOAT", uops)
def test_float_remainder_speculative_guards_from_tracing(self):
# a, b are locals with no statically known type. Tracing records
# them as floats; the optimizer then speculatively emits
# _GUARD_{TOS,NOS}_FLOAT for NB_REMAINDER. That narrows both
# operands to float, and the _BINARY_OP handler marks the result
# as a unique float. Downstream, `* 2.0` therefore specializes
# to _BINARY_OP_MULTIPLY_FLOAT_INPLACE.
def testfunc(args):
a, b, n = args
total = 0.0
for _ in range(n):
total += (a % b) * 2.0
return total
res, ex = self._run_with_optimizer(testfunc, (10.0, 3.0, TIER2_THRESHOLD))
self.assertAlmostEqual(res, TIER2_THRESHOLD * (10.0 % 3.0) * 2.0)
self.assertIsNotNone(ex)
uops = get_opnames(ex)
self.assertIn("_GUARD_TOS_FLOAT", uops)
self.assertIn("_GUARD_NOS_FLOAT", uops)
self.assertIn("_BINARY_OP_MULTIPLY_FLOAT_INPLACE", uops)
def test_float_truediv_type_propagation(self):
# Test the _BINARY_OP_TRUEDIV_FLOAT propagates type information
def testfunc(args):
a, b, n = args
total = 0.0
for _ in range(n):
x = (a + b) # type of x will specialize to float
total += x / x - x / x
return total
res, ex = self._run_with_optimizer(testfunc,
(2.0, 3.0, TIER2_THRESHOLD))
expected = TIER2_THRESHOLD * ((2.0 + 3.0) / (2.0 + 3.0) - (2.0 + 3.0) / (2.0 + 3.0))
self.assertAlmostEqual(res, expected)
self.assertIsNotNone(ex)
uops = get_opnames(ex)
self.assertIn("_BINARY_OP_TRUEDIV_FLOAT", uops)
self.assertIn("_BINARY_OP_SUBTRACT_FLOAT_INPLACE", uops)
def test_float_truediv_unique_result_enables_inplace(self):
# (a+b) / (c+d) / (e+f): chained divisions where each result
# is unique, enabling inplace for subsequent divisions.
def testfunc(args):
a, b, c, d, e, f, n = args
total = 0.0
for _ in range(n):
total += (a + b) / (c + d) / (e + f)
return total
res, ex = self._run_with_optimizer(testfunc,
(2.0, 3.0, 1.0, 1.0, 1.0, 1.0, TIER2_THRESHOLD))
expected = TIER2_THRESHOLD * ((2.0 + 3.0) / (1.0 + 1.0) / (1.0 + 1.0))
self.assertAlmostEqual(res, expected)
self.assertIsNotNone(ex)
uops = get_opnames(ex)
self.assertIn("_BINARY_OP_TRUEDIV_FLOAT_INPLACE", uops)
def test_float_add_chain_both_unique(self):
# (a+b) + (c+d): both sub-additions produce unique floats.
# The outer + should use inplace on one of them.
def testfunc(args):
a, b, c, d, n = args
total = 0.0
for _ in range(n):
total += (a + b) + (c + d)
return total
res, ex = self._run_with_optimizer(testfunc, (1.0, 2.0, 3.0, 4.0, TIER2_THRESHOLD))
self.assertAlmostEqual(res, TIER2_THRESHOLD * 10.0)
self.assertIsNotNone(ex)
uops = get_opnames(ex)
# The outer + should use inplace (at least one operand is unique)
inplace = (
"_BINARY_OP_ADD_FLOAT_INPLACE" in uops
or "_BINARY_OP_ADD_FLOAT_INPLACE_RIGHT" in uops
)
self.assertTrue(inplace, "Expected inplace add for unique sub-results")
def test_float_truediv_non_float_type_no_crash(self):
# Fraction / Fraction goes through _BINARY_OP with NB_TRUE_DIVIDE
# but returns Fraction, not float. The optimizer must not assume
# the result is float for non-int/float operands. See gh-146306.
from fractions import Fraction
def testfunc(args):
a, b, n = args
total = Fraction(0)
for _ in range(n):
total += a / b
return float(total)
res, ex = self._run_with_optimizer(testfunc, (Fraction(10), Fraction(3), TIER2_THRESHOLD))
expected = float(TIER2_THRESHOLD * Fraction(10, 3))
self.assertAlmostEqual(res, expected)
def test_float_truediv_mixed_float_fraction_no_crash(self):
# float / Fraction: lhs is known float from a prior guard,
# but rhs is Fraction. The guard insertion for rhs should
# deopt cleanly at runtime, not crash.
from fractions import Fraction
def testfunc(args):
a, b, c, n = args
total = 0.0
for _ in range(n):
total += (a + b) / c # (a+b) is float, c is Fraction
return total
res, ex = self._run_with_optimizer(testfunc, (2.0, 3.0, Fraction(4), TIER2_THRESHOLD))
expected = TIER2_THRESHOLD * (5.0 / Fraction(4))
self.assertAlmostEqual(res, float(expected))
def test_int_add_inplace_unique_lhs(self):
# a * b produces a unique compact int; adding c reuses it in place
def testfunc(args):
@@ -0,0 +1,2 @@
Specialize float true division in the tier 2 optimizer with inplace
mutation for uniquely-referenced operands.
+1
View File
@@ -367,6 +367,7 @@
if (res_o == NULL) {
JUMP_TO_LABEL(error);
}
assert(!PyFloat_CheckExact(res_o) || Py_REFCNT(res_o) == 1);
res = PyStackRef_FromPyObjectSteal(res_o);
l = left;
r = right;
+53 -1
View File
@@ -895,6 +895,53 @@ dummy_func(
INPUTS_DEAD();
}
// Float true division — not specialized at tier 1, emitted by the
// tier 2 optimizer when both operands are known floats.
tier2 op(_BINARY_OP_TRUEDIV_FLOAT, (left, right -- res, l, r)) {
PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
assert(PyFloat_CheckExact(left_o));
assert(PyFloat_CheckExact(right_o));
STAT_INC(BINARY_OP, hit);
double divisor = ((PyFloatObject *)right_o)->ob_fval;
if (divisor == 0.0) {
PyErr_SetString(PyExc_ZeroDivisionError,
"float division by zero");
ERROR_NO_POP();
}
double dres = ((PyFloatObject *)left_o)->ob_fval / divisor;
PyObject *d = PyFloat_FromDouble(dres);
if (d == NULL) {
ERROR_NO_POP();
}
res = PyStackRef_FromPyObjectSteal(d);
l = left;
r = right;
INPUTS_DEAD();
}
tier2 op(_BINARY_OP_TRUEDIV_FLOAT_INPLACE, (left, right -- res, l, r)) {
FLOAT_INPLACE_DIVOP(left, right, left);
if (_divop_err) {
ERROR_NO_POP();
}
res = left;
l = PyStackRef_NULL;
r = right;
INPUTS_DEAD();
}
tier2 op(_BINARY_OP_TRUEDIV_FLOAT_INPLACE_RIGHT, (left, right -- res, l, r)) {
FLOAT_INPLACE_DIVOP(left, right, right);
if (_divop_err) {
ERROR_NO_POP();
}
res = right;
l = left;
r = PyStackRef_NULL;
INPUTS_DEAD();
}
pure op(_BINARY_OP_ADD_UNICODE, (left, right -- res, l, r)) {
PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
@@ -983,6 +1030,11 @@ dummy_func(
if (res_o == NULL) {
ERROR_NO_POP();
}
// The JIT and tier 2 optimizer assume that float results from
// binary operations are always uniquely referenced (refcount == 1).
// If this assertion fails, update the optimizer to stop marking
// float results as unique in optimizer_bytecodes.c.
assert(!PyFloat_CheckExact(res_o) || Py_REFCNT(res_o) == 1);
res = PyStackRef_FromPyObjectSteal(res_o);
l = left;
r = right;
@@ -5673,7 +5725,7 @@ dummy_func(
DEAD(rhs);
}
macro(BINARY_OP) = _SPECIALIZE_BINARY_OP + unused/4 + _BINARY_OP + POP_TOP + POP_TOP;
macro(BINARY_OP) = _SPECIALIZE_BINARY_OP + _RECORD_TOS + _RECORD_NOS + unused/4 + _BINARY_OP + POP_TOP + POP_TOP;
pure replicate(2:4) inst(SWAP, (bottom, unused[oparg-2], top --
bottom, unused[oparg-2], top)) {
+24 -1
View File
@@ -563,6 +563,30 @@ gen_try_set_executing(PyGenObject *gen)
->ob_fval = _dres; \
} while (0)
// Inplace float true division. Sets _divop_err to 1 on zero division.
// Caller must check _divop_err and call ERROR_NO_POP() if set.
#define FLOAT_INPLACE_DIVOP(left, right, TARGET) \
int _divop_err = 0; \
do { \
PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); \
PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); \
assert(PyFloat_CheckExact(left_o)); \
assert(PyFloat_CheckExact(right_o)); \
assert(_PyObject_IsUniquelyReferenced( \
PyStackRef_AsPyObjectBorrow(TARGET))); \
STAT_INC(BINARY_OP, hit); \
double _divisor = ((PyFloatObject *)right_o)->ob_fval; \
if (_divisor == 0.0) { \
PyErr_SetString(PyExc_ZeroDivisionError, \
"float division by zero"); \
_divop_err = 1; \
break; \
} \
double _dres = ((PyFloatObject *)left_o)->ob_fval / _divisor; \
((PyFloatObject *)PyStackRef_AsPyObjectBorrow(TARGET)) \
->ob_fval = _dres; \
} while (0)
// Inplace compact int operation. TARGET is expected to be uniquely
// referenced at the optimizer level, but at runtime it may be a
// cached small int singleton. We check _Py_IsImmortal on TARGET
@@ -604,4 +628,3 @@ gen_try_set_executing(PyGenObject *gen)
(PyLongObject *)PyStackRef_AsPyObjectBorrow(left), \
(PyLongObject *)PyStackRef_AsPyObjectBorrow(right)); \
}
+236
View File
@@ -6001,6 +6001,241 @@
break;
}
case _BINARY_OP_TRUEDIV_FLOAT_r23: {
CHECK_CURRENT_CACHED_VALUES(2);
assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE());
_PyStackRef right;
_PyStackRef left;
_PyStackRef res;
_PyStackRef l;
_PyStackRef r;
_PyStackRef _stack_item_0 = _tos_cache0;
_PyStackRef _stack_item_1 = _tos_cache1;
right = _stack_item_1;
left = _stack_item_0;
PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
assert(PyFloat_CheckExact(left_o));
assert(PyFloat_CheckExact(right_o));
STAT_INC(BINARY_OP, hit);
double divisor = ((PyFloatObject *)right_o)->ob_fval;
if (divisor == 0.0) {
stack_pointer[0] = left;
stack_pointer[1] = right;
stack_pointer += 2;
ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
_PyFrame_SetStackPointer(frame, stack_pointer);
PyErr_SetString(PyExc_ZeroDivisionError,
"float division by zero");
stack_pointer = _PyFrame_GetStackPointer(frame);
SET_CURRENT_CACHED_VALUES(0);
JUMP_TO_ERROR();
}
double dres = ((PyFloatObject *)left_o)->ob_fval / divisor;
PyObject *d = PyFloat_FromDouble(dres);
if (d == NULL) {
stack_pointer[0] = left;
stack_pointer[1] = right;
stack_pointer += 2;
ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
SET_CURRENT_CACHED_VALUES(0);
JUMP_TO_ERROR();
}
res = PyStackRef_FromPyObjectSteal(d);
l = left;
r = right;
_tos_cache2 = r;
_tos_cache1 = l;
_tos_cache0 = res;
SET_CURRENT_CACHED_VALUES(3);
assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE());
break;
}
case _BINARY_OP_TRUEDIV_FLOAT_INPLACE_r03: {
CHECK_CURRENT_CACHED_VALUES(0);
assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE());
_PyStackRef right;
_PyStackRef left;
_PyStackRef res;
_PyStackRef l;
_PyStackRef r;
right = stack_pointer[-1];
left = stack_pointer[-2];
FLOAT_INPLACE_DIVOP(left, right, left);
if (_divop_err) {
SET_CURRENT_CACHED_VALUES(0);
JUMP_TO_ERROR();
}
res = left;
l = PyStackRef_NULL;
r = right;
_tos_cache2 = r;
_tos_cache1 = l;
_tos_cache0 = res;
SET_CURRENT_CACHED_VALUES(3);
stack_pointer += -2;
ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE());
break;
}
case _BINARY_OP_TRUEDIV_FLOAT_INPLACE_r13: {
CHECK_CURRENT_CACHED_VALUES(1);
assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE());
_PyStackRef right;
_PyStackRef left;
_PyStackRef res;
_PyStackRef l;
_PyStackRef r;
_PyStackRef _stack_item_0 = _tos_cache0;
right = _stack_item_0;
left = stack_pointer[-1];
FLOAT_INPLACE_DIVOP(left, right, left);
if (_divop_err) {
stack_pointer[0] = right;
stack_pointer += 1;
ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
SET_CURRENT_CACHED_VALUES(0);
JUMP_TO_ERROR();
}
res = left;
l = PyStackRef_NULL;
r = right;
_tos_cache2 = r;
_tos_cache1 = l;
_tos_cache0 = res;
SET_CURRENT_CACHED_VALUES(3);
stack_pointer += -1;
ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE());
break;
}
case _BINARY_OP_TRUEDIV_FLOAT_INPLACE_r23: {
CHECK_CURRENT_CACHED_VALUES(2);
assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE());
_PyStackRef right;
_PyStackRef left;
_PyStackRef res;
_PyStackRef l;
_PyStackRef r;
_PyStackRef _stack_item_0 = _tos_cache0;
_PyStackRef _stack_item_1 = _tos_cache1;
right = _stack_item_1;
left = _stack_item_0;
FLOAT_INPLACE_DIVOP(left, right, left);
if (_divop_err) {
stack_pointer[0] = left;
stack_pointer[1] = right;
stack_pointer += 2;
ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
SET_CURRENT_CACHED_VALUES(0);
JUMP_TO_ERROR();
}
res = left;
l = PyStackRef_NULL;
r = right;
_tos_cache2 = r;
_tos_cache1 = l;
_tos_cache0 = res;
SET_CURRENT_CACHED_VALUES(3);
assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE());
break;
}
case _BINARY_OP_TRUEDIV_FLOAT_INPLACE_RIGHT_r03: {
CHECK_CURRENT_CACHED_VALUES(0);
assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE());
_PyStackRef right;
_PyStackRef left;
_PyStackRef res;
_PyStackRef l;
_PyStackRef r;
right = stack_pointer[-1];
left = stack_pointer[-2];
FLOAT_INPLACE_DIVOP(left, right, right);
if (_divop_err) {
SET_CURRENT_CACHED_VALUES(0);
JUMP_TO_ERROR();
}
res = right;
l = left;
r = PyStackRef_NULL;
_tos_cache2 = r;
_tos_cache1 = l;
_tos_cache0 = res;
SET_CURRENT_CACHED_VALUES(3);
stack_pointer += -2;
ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE());
break;
}
case _BINARY_OP_TRUEDIV_FLOAT_INPLACE_RIGHT_r13: {
CHECK_CURRENT_CACHED_VALUES(1);
assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE());
_PyStackRef right;
_PyStackRef left;
_PyStackRef res;
_PyStackRef l;
_PyStackRef r;
_PyStackRef _stack_item_0 = _tos_cache0;
right = _stack_item_0;
left = stack_pointer[-1];
FLOAT_INPLACE_DIVOP(left, right, right);
if (_divop_err) {
stack_pointer[0] = right;
stack_pointer += 1;
ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
SET_CURRENT_CACHED_VALUES(0);
JUMP_TO_ERROR();
}
res = right;
l = left;
r = PyStackRef_NULL;
_tos_cache2 = r;
_tos_cache1 = l;
_tos_cache0 = res;
SET_CURRENT_CACHED_VALUES(3);
stack_pointer += -1;
ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE());
break;
}
case _BINARY_OP_TRUEDIV_FLOAT_INPLACE_RIGHT_r23: {
CHECK_CURRENT_CACHED_VALUES(2);
assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE());
_PyStackRef right;
_PyStackRef left;
_PyStackRef res;
_PyStackRef l;
_PyStackRef r;
_PyStackRef _stack_item_0 = _tos_cache0;
_PyStackRef _stack_item_1 = _tos_cache1;
right = _stack_item_1;
left = _stack_item_0;
FLOAT_INPLACE_DIVOP(left, right, right);
if (_divop_err) {
stack_pointer[0] = left;
stack_pointer[1] = right;
stack_pointer += 2;
ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
SET_CURRENT_CACHED_VALUES(0);
JUMP_TO_ERROR();
}
res = right;
l = left;
r = PyStackRef_NULL;
_tos_cache2 = r;
_tos_cache1 = l;
_tos_cache0 = res;
SET_CURRENT_CACHED_VALUES(3);
assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE());
break;
}
case _BINARY_OP_ADD_UNICODE_r03: {
CHECK_CURRENT_CACHED_VALUES(0);
assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE());
@@ -6230,6 +6465,7 @@
SET_CURRENT_CACHED_VALUES(0);
JUMP_TO_ERROR();
}
assert(!PyFloat_CheckExact(res_o) || Py_REFCNT(res_o) == 1);
res = PyStackRef_FromPyObjectSteal(res_o);
l = left;
r = right;
+1
View File
@@ -367,6 +367,7 @@
if (res_o == NULL) {
JUMP_TO_LABEL(error);
}
assert(!PyFloat_CheckExact(res_o) || Py_REFCNT(res_o) == 1);
res = PyStackRef_FromPyObjectSteal(res_o);
l = left;
r = right;
+53 -7
View File
@@ -289,7 +289,56 @@ dummy_func(void) {
bool rhs_int = sym_matches_type(rhs, &PyLong_Type);
bool lhs_float = sym_matches_type(lhs, &PyFloat_Type);
bool rhs_float = sym_matches_type(rhs, &PyFloat_Type);
if (!((lhs_int || lhs_float) && (rhs_int || rhs_float))) {
bool is_truediv = (oparg == NB_TRUE_DIVIDE
|| oparg == NB_INPLACE_TRUE_DIVIDE);
bool is_remainder = (oparg == NB_REMAINDER
|| oparg == NB_INPLACE_REMAINDER);
// Promote probable-float operands to known floats via speculative
// guards. _RECORD_TOS / _RECORD_NOS in the BINARY_OP macro record
// the observed operand during tracing, which sym_get_probable_type
// reads here. Applied only to ops where narrowing unlocks a
// meaningful downstream win:
// - NB_TRUE_DIVIDE: enables the specialized float path below.
// - NB_REMAINDER: lets the float result type propagate.
// NB_POWER is excluded — speculative guards there regressed
// test_power_type_depends_on_input_values (GH-127844).
if (is_truediv || is_remainder) {
if (!sym_has_type(rhs)
&& sym_get_probable_type(rhs) == &PyFloat_Type) {
ADD_OP(_GUARD_TOS_FLOAT, 0, 0);
sym_set_type(rhs, &PyFloat_Type);
rhs_float = true;
}
if (!sym_has_type(lhs)
&& sym_get_probable_type(lhs) == &PyFloat_Type) {
ADD_OP(_GUARD_NOS_FLOAT, 0, 0);
sym_set_type(lhs, &PyFloat_Type);
lhs_float = true;
}
}
if (is_truediv && lhs_float && rhs_float) {
if (PyJitRef_IsUnique(lhs)) {
ADD_OP(_BINARY_OP_TRUEDIV_FLOAT_INPLACE, 0, 0);
l = sym_new_null(ctx);
r = rhs;
}
else if (PyJitRef_IsUnique(rhs)) {
ADD_OP(_BINARY_OP_TRUEDIV_FLOAT_INPLACE_RIGHT, 0, 0);
l = lhs;
r = sym_new_null(ctx);
}
else {
ADD_OP(_BINARY_OP_TRUEDIV_FLOAT, 0, 0);
l = lhs;
r = rhs;
}
res = PyJitRef_MakeUnique(sym_new_type(ctx, &PyFloat_Type));
}
else if (is_truediv
&& (lhs_int || lhs_float) && (rhs_int || rhs_float)) {
res = PyJitRef_MakeUnique(sym_new_type(ctx, &PyFloat_Type));
}
else if (!((lhs_int || lhs_float) && (rhs_int || rhs_float))) {
// There's something other than an int or float involved:
res = sym_new_unknown(ctx);
}
@@ -312,7 +361,7 @@ dummy_func(void) {
}
else if (lhs_float) {
// Case C:
res = sym_new_type(ctx, &PyFloat_Type);
res = PyJitRef_MakeUnique(sym_new_type(ctx, &PyFloat_Type));
}
else if (!sym_is_const(ctx, rhs)) {
// Case A or B... can't know without the sign of the RHS:
@@ -320,21 +369,18 @@ dummy_func(void) {
}
else if (_PyLong_IsNegative((PyLongObject *)sym_get_const(ctx, rhs))) {
// Case B:
res = sym_new_type(ctx, &PyFloat_Type);
res = PyJitRef_MakeUnique(sym_new_type(ctx, &PyFloat_Type));
}
else {
// Case A:
res = sym_new_type(ctx, &PyLong_Type);
}
}
else if (oparg == NB_TRUE_DIVIDE || oparg == NB_INPLACE_TRUE_DIVIDE) {
res = sym_new_type(ctx, &PyFloat_Type);
}
else if (lhs_int && rhs_int) {
res = sym_new_type(ctx, &PyLong_Type);
}
else {
res = sym_new_type(ctx, &PyFloat_Type);
res = PyJitRef_MakeUnique(sym_new_type(ctx, &PyFloat_Type));
}
}
+92 -7
View File
@@ -1113,6 +1113,54 @@
break;
}
case _BINARY_OP_TRUEDIV_FLOAT: {
JitOptRef res;
JitOptRef l;
JitOptRef r;
res = sym_new_not_null(ctx);
l = sym_new_not_null(ctx);
r = sym_new_not_null(ctx);
CHECK_STACK_BOUNDS(1);
stack_pointer[-2] = res;
stack_pointer[-1] = l;
stack_pointer[0] = r;
stack_pointer += 1;
ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
break;
}
case _BINARY_OP_TRUEDIV_FLOAT_INPLACE: {
JitOptRef res;
JitOptRef l;
JitOptRef r;
res = sym_new_not_null(ctx);
l = sym_new_not_null(ctx);
r = sym_new_not_null(ctx);
CHECK_STACK_BOUNDS(1);
stack_pointer[-2] = res;
stack_pointer[-1] = l;
stack_pointer[0] = r;
stack_pointer += 1;
ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
break;
}
case _BINARY_OP_TRUEDIV_FLOAT_INPLACE_RIGHT: {
JitOptRef res;
JitOptRef l;
JitOptRef r;
res = sym_new_not_null(ctx);
l = sym_new_not_null(ctx);
r = sym_new_not_null(ctx);
CHECK_STACK_BOUNDS(1);
stack_pointer[-2] = res;
stack_pointer[-1] = l;
stack_pointer[0] = r;
stack_pointer += 1;
ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
break;
}
case _BINARY_OP_ADD_UNICODE: {
JitOptRef right;
JitOptRef left;
@@ -5007,7 +5055,47 @@
bool rhs_int = sym_matches_type(rhs, &PyLong_Type);
bool lhs_float = sym_matches_type(lhs, &PyFloat_Type);
bool rhs_float = sym_matches_type(rhs, &PyFloat_Type);
if (!((lhs_int || lhs_float) && (rhs_int || rhs_float))) {
bool is_truediv = (oparg == NB_TRUE_DIVIDE
|| oparg == NB_INPLACE_TRUE_DIVIDE);
bool is_remainder = (oparg == NB_REMAINDER
|| oparg == NB_INPLACE_REMAINDER);
if (is_truediv || is_remainder) {
if (!sym_has_type(rhs)
&& sym_get_probable_type(rhs) == &PyFloat_Type) {
ADD_OP(_GUARD_TOS_FLOAT, 0, 0);
sym_set_type(rhs, &PyFloat_Type);
rhs_float = true;
}
if (!sym_has_type(lhs)
&& sym_get_probable_type(lhs) == &PyFloat_Type) {
ADD_OP(_GUARD_NOS_FLOAT, 0, 0);
sym_set_type(lhs, &PyFloat_Type);
lhs_float = true;
}
}
if (is_truediv && lhs_float && rhs_float) {
if (PyJitRef_IsUnique(lhs)) {
ADD_OP(_BINARY_OP_TRUEDIV_FLOAT_INPLACE, 0, 0);
l = sym_new_null(ctx);
r = rhs;
}
else if (PyJitRef_IsUnique(rhs)) {
ADD_OP(_BINARY_OP_TRUEDIV_FLOAT_INPLACE_RIGHT, 0, 0);
l = lhs;
r = sym_new_null(ctx);
}
else {
ADD_OP(_BINARY_OP_TRUEDIV_FLOAT, 0, 0);
l = lhs;
r = rhs;
}
res = PyJitRef_MakeUnique(sym_new_type(ctx, &PyFloat_Type));
}
else if (is_truediv
&& (lhs_int || lhs_float) && (rhs_int || rhs_float)) {
res = PyJitRef_MakeUnique(sym_new_type(ctx, &PyFloat_Type));
}
else if (!((lhs_int || lhs_float) && (rhs_int || rhs_float))) {
res = sym_new_unknown(ctx);
}
else if (oparg == NB_POWER || oparg == NB_INPLACE_POWER) {
@@ -5015,26 +5103,23 @@
res = sym_new_unknown(ctx);
}
else if (lhs_float) {
res = sym_new_type(ctx, &PyFloat_Type);
res = PyJitRef_MakeUnique(sym_new_type(ctx, &PyFloat_Type));
}
else if (!sym_is_const(ctx, rhs)) {
res = sym_new_unknown(ctx);
}
else if (_PyLong_IsNegative((PyLongObject *)sym_get_const(ctx, rhs))) {
res = sym_new_type(ctx, &PyFloat_Type);
res = PyJitRef_MakeUnique(sym_new_type(ctx, &PyFloat_Type));
}
else {
res = sym_new_type(ctx, &PyLong_Type);
}
}
else if (oparg == NB_TRUE_DIVIDE || oparg == NB_INPLACE_TRUE_DIVIDE) {
res = sym_new_type(ctx, &PyFloat_Type);
}
else if (lhs_int && rhs_int) {
res = sym_new_type(ctx, &PyLong_Type);
}
else {
res = sym_new_type(ctx, &PyFloat_Type);
res = PyJitRef_MakeUnique(sym_new_type(ctx, &PyFloat_Type));
}
CHECK_STACK_BOUNDS(1);
stack_pointer[-2] = res;
+4 -1
View File
@@ -99,6 +99,7 @@ void _PyOpcode_RecordFunction_CODE(_PyInterpreterFrame *frame, _PyStackRef *stac
#define _RECORD_BOUND_METHOD_INDEX 6
#define _RECORD_CALLABLE_KW_INDEX 7
#define _RECORD_4OS_INDEX 8
#define _RECORD_TOS_INDEX 9
const _PyOpcodeRecordEntry _PyOpcode_RecordEntries[256] = {
[TO_BOOL_ALWAYS_TRUE] = {1, {_RECORD_TOS_TYPE_INDEX}},
@@ -137,9 +138,10 @@ const _PyOpcodeRecordEntry _PyOpcode_RecordEntries[256] = {
[CALL_KW_PY] = {1, {_RECORD_CALLABLE_KW_INDEX}},
[CALL_KW_BOUND_METHOD] = {1, {_RECORD_CALLABLE_KW_INDEX}},
[CALL_EX_PY] = {1, {_RECORD_4OS_INDEX}},
[BINARY_OP] = {2, {_RECORD_TOS_INDEX, _RECORD_NOS_INDEX}},
};
const _Py_RecordFuncPtr _PyOpcode_RecordFunctions[9] = {
const _Py_RecordFuncPtr _PyOpcode_RecordFunctions[10] = {
[0] = NULL,
[_RECORD_TOS_TYPE_INDEX] = _PyOpcode_RecordFunction_TOS_TYPE,
[_RECORD_NOS_INDEX] = _PyOpcode_RecordFunction_NOS,
@@ -149,4 +151,5 @@ const _Py_RecordFuncPtr _PyOpcode_RecordFunctions[9] = {
[_RECORD_BOUND_METHOD_INDEX] = _PyOpcode_RecordFunction_BOUND_METHOD,
[_RECORD_CALLABLE_KW_INDEX] = _PyOpcode_RecordFunction_CALLABLE_KW,
[_RECORD_4OS_INDEX] = _PyOpcode_RecordFunction_4OS,
[_RECORD_TOS_INDEX] = _PyOpcode_RecordFunction_TOS,
};