mirror of
https://github.com/python/cpython.git
synced 2026-05-06 04:37:33 -04:00
gh-148378: Allow multiple consecutive recording ops per macro op (GH-148496)
This commit is contained in:
@@ -91,13 +91,15 @@ typedef struct _PyJitTracerInitialState {
|
||||
_Py_CODEUNIT *jump_backward_instr;
|
||||
} _PyJitTracerInitialState;
|
||||
|
||||
#define MAX_RECORDED_VALUES 3
|
||||
typedef struct _PyJitTracerPreviousState {
|
||||
int instr_oparg;
|
||||
int instr_stacklevel;
|
||||
_Py_CODEUNIT *instr;
|
||||
PyCodeObject *instr_code; // Strong
|
||||
struct _PyInterpreterFrame *instr_frame;
|
||||
PyObject *recorded_value; // Strong, may be NULL
|
||||
PyObject *recorded_values[MAX_RECORDED_VALUES]; // Strong, may be NULL
|
||||
int recorded_count;
|
||||
} _PyJitTracerPreviousState;
|
||||
|
||||
typedef struct _PyJitTracerTranslatorState {
|
||||
@@ -481,7 +483,12 @@ void _PyJit_TracerFree(_PyThreadStateImpl *_tstate);
|
||||
#ifdef _Py_TIER2
|
||||
typedef void (*_Py_RecordFuncPtr)(_PyInterpreterFrame *frame, _PyStackRef *stackpointer, int oparg, PyObject **recorded_value);
|
||||
PyAPI_DATA(const _Py_RecordFuncPtr) _PyOpcode_RecordFunctions[];
|
||||
PyAPI_DATA(const uint8_t) _PyOpcode_RecordFunctionIndices[256];
|
||||
|
||||
typedef struct {
|
||||
uint8_t count;
|
||||
uint8_t indices[MAX_RECORDED_VALUES];
|
||||
} _PyOpcodeRecordEntry;
|
||||
PyAPI_DATA(const _PyOpcodeRecordEntry) _PyOpcode_RecordEntries[256];
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
@@ -29,12 +29,13 @@ skip_if_different_mount_drives()
|
||||
|
||||
test_tools.skip_if_missing("cases_generator")
|
||||
with test_tools.imports_under_tool("cases_generator"):
|
||||
from analyzer import StackItem
|
||||
from analyzer import StackItem, analyze_files
|
||||
from cwriter import CWriter
|
||||
import parser
|
||||
from stack import Local, Stack
|
||||
import tier1_generator
|
||||
import optimizer_generator
|
||||
import record_function_generator
|
||||
|
||||
|
||||
def handle_stderr():
|
||||
@@ -1948,6 +1949,202 @@ class TestGeneratedCases(unittest.TestCase):
|
||||
with self.assertRaisesRegex(SyntaxError, "Recording uop"):
|
||||
self.run_cases_test(input, "")
|
||||
|
||||
def test_multiple_consecutive_recording_uops(self):
|
||||
"""Multiple consecutive recording uops at the start of a macro are legal."""
|
||||
input = """
|
||||
tier2 op(_RECORD_A, (a, b -- a, b)) {
|
||||
RECORD_VALUE(a);
|
||||
}
|
||||
tier2 op(_RECORD_B, (a, b -- a, b)) {
|
||||
RECORD_VALUE(b);
|
||||
}
|
||||
op(_DO_STUFF, (a, b -- res)) {
|
||||
res = a;
|
||||
INPUTS_DEAD();
|
||||
}
|
||||
macro(OP) = _RECORD_A + _RECORD_B + _DO_STUFF;
|
||||
"""
|
||||
output = """
|
||||
TARGET(OP) {
|
||||
#if _Py_TAIL_CALL_INTERP
|
||||
int opcode = OP;
|
||||
(void)(opcode);
|
||||
#endif
|
||||
frame->instr_ptr = next_instr;
|
||||
next_instr += 1;
|
||||
INSTRUCTION_STATS(OP);
|
||||
_PyStackRef a;
|
||||
_PyStackRef res;
|
||||
// _DO_STUFF
|
||||
{
|
||||
a = stack_pointer[-2];
|
||||
res = a;
|
||||
}
|
||||
stack_pointer[-2] = res;
|
||||
stack_pointer += -1;
|
||||
ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
|
||||
DISPATCH();
|
||||
}
|
||||
"""
|
||||
self.run_cases_test(input, output)
|
||||
|
||||
def test_multiple_recording_uops_after_specializing(self):
|
||||
"""Multiple recording uops after a specializing uop are legal."""
|
||||
input = """
|
||||
specializing op(_SPECIALIZE_OP, (counter/1, a, b -- a, b)) {
|
||||
SPAM();
|
||||
}
|
||||
tier2 op(_RECORD_A, (a, b -- a, b)) {
|
||||
RECORD_VALUE(a);
|
||||
}
|
||||
tier2 op(_RECORD_B, (a, b -- a, b)) {
|
||||
RECORD_VALUE(b);
|
||||
}
|
||||
op(_DO_STUFF, (a, b -- res)) {
|
||||
res = a;
|
||||
INPUTS_DEAD();
|
||||
}
|
||||
macro(OP) = _SPECIALIZE_OP + _RECORD_A + _RECORD_B + unused/2 + _DO_STUFF;
|
||||
"""
|
||||
output = """
|
||||
TARGET(OP) {
|
||||
#if _Py_TAIL_CALL_INTERP
|
||||
int opcode = OP;
|
||||
(void)(opcode);
|
||||
#endif
|
||||
_Py_CODEUNIT* const this_instr = next_instr;
|
||||
(void)this_instr;
|
||||
frame->instr_ptr = next_instr;
|
||||
next_instr += 4;
|
||||
INSTRUCTION_STATS(OP);
|
||||
_PyStackRef a;
|
||||
_PyStackRef res;
|
||||
// _SPECIALIZE_OP
|
||||
{
|
||||
uint16_t counter = read_u16(&this_instr[1].cache);
|
||||
(void)counter;
|
||||
SPAM();
|
||||
}
|
||||
/* Skip 2 cache entries */
|
||||
// _DO_STUFF
|
||||
{
|
||||
a = stack_pointer[-2];
|
||||
res = a;
|
||||
}
|
||||
stack_pointer[-2] = res;
|
||||
stack_pointer += -1;
|
||||
ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
|
||||
DISPATCH();
|
||||
}
|
||||
"""
|
||||
self.run_cases_test(input, output)
|
||||
|
||||
def test_recording_uop_between_real_uops_rejected(self):
|
||||
"""A recording uop sandwiched between real uops is rejected."""
|
||||
input = """
|
||||
tier2 op(_RECORD_A, (a, b -- a, b)) {
|
||||
RECORD_VALUE(a);
|
||||
}
|
||||
op(_FIRST, (a, b -- a, b)) {
|
||||
first(a);
|
||||
}
|
||||
tier2 op(_RECORD_B, (a, b -- a, b)) {
|
||||
RECORD_VALUE(b);
|
||||
}
|
||||
macro(OP) = _RECORD_A + _FIRST + _RECORD_B;
|
||||
"""
|
||||
with self.assertRaisesRegex(SyntaxError,
|
||||
"must precede all "
|
||||
"non-recording, non-specializing uops"):
|
||||
self.run_cases_test(input, "")
|
||||
|
||||
|
||||
class TestRecorderTableGeneration(unittest.TestCase):
|
||||
|
||||
def setUp(self) -> None:
|
||||
super().setUp()
|
||||
self.maxDiff = None
|
||||
self.temp_dir = tempfile.gettempdir()
|
||||
self.temp_input_filename = os.path.join(self.temp_dir, "input.txt")
|
||||
|
||||
def tearDown(self) -> None:
|
||||
try:
|
||||
os.remove(self.temp_input_filename)
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
super().tearDown()
|
||||
|
||||
def generate_tables(self, input: str) -> str:
|
||||
import io
|
||||
with open(self.temp_input_filename, "w+") as f:
|
||||
f.write(parser.BEGIN_MARKER)
|
||||
f.write(input)
|
||||
f.write(parser.END_MARKER)
|
||||
with handle_stderr():
|
||||
analysis = analyze_files([self.temp_input_filename])
|
||||
buf = io.StringIO()
|
||||
out = CWriter(buf, 0, False)
|
||||
record_function_generator.generate_recorder_tables(analysis, out)
|
||||
return buf.getvalue()
|
||||
|
||||
def test_single_recording_uop_generates_count(self):
|
||||
input = """
|
||||
tier2 op(_RECORD_TOS, (value -- value)) {
|
||||
RECORD_VALUE(value);
|
||||
}
|
||||
op(_DO_STUFF, (value -- res)) {
|
||||
res = value;
|
||||
}
|
||||
macro(OP) = _RECORD_TOS + _DO_STUFF;
|
||||
"""
|
||||
output = self.generate_tables(input)
|
||||
self.assertIn("_RECORD_TOS_INDEX", output)
|
||||
self.assertIn("[OP] = {1, {_RECORD_TOS_INDEX}}", output)
|
||||
|
||||
def test_three_recording_uops_generate_count_3_in_order(self):
|
||||
input = """
|
||||
tier2 op(_RECORD_X, (a, b, c -- a, b, c)) {
|
||||
RECORD_VALUE(a);
|
||||
}
|
||||
tier2 op(_RECORD_Y, (a, b, c -- a, b, c)) {
|
||||
RECORD_VALUE(b);
|
||||
}
|
||||
tier2 op(_RECORD_Z, (a, b, c -- a, b, c)) {
|
||||
RECORD_VALUE(c);
|
||||
}
|
||||
op(_DO_STUFF, (a, b, c -- res)) {
|
||||
res = a;
|
||||
}
|
||||
macro(OP) = _RECORD_X + _RECORD_Y + _RECORD_Z + _DO_STUFF;
|
||||
"""
|
||||
output = self.generate_tables(input)
|
||||
self.assertIn(
|
||||
"[OP] = {3, {_RECORD_X_INDEX, _RECORD_Y_INDEX, _RECORD_Z_INDEX}}",
|
||||
output,
|
||||
)
|
||||
|
||||
def test_four_recording_uops_rejected(self):
|
||||
input = """
|
||||
tier2 op(_RECORD_A, (a, b, c, d -- a, b, c, d)) {
|
||||
RECORD_VALUE(a);
|
||||
}
|
||||
tier2 op(_RECORD_B, (a, b, c, d -- a, b, c, d)) {
|
||||
RECORD_VALUE(b);
|
||||
}
|
||||
tier2 op(_RECORD_C, (a, b, c, d -- a, b, c, d)) {
|
||||
RECORD_VALUE(c);
|
||||
}
|
||||
tier2 op(_RECORD_D, (a, b, c, d -- a, b, c, d)) {
|
||||
RECORD_VALUE(d);
|
||||
}
|
||||
op(_DO_STUFF, (a, b, c, d -- res)) {
|
||||
res = a;
|
||||
}
|
||||
macro(OP) = _RECORD_A + _RECORD_B + _RECORD_C + _RECORD_D + _DO_STUFF;
|
||||
"""
|
||||
with self.assertRaisesRegex(ValueError, "exceeds MAX_RECORDED_VALUES"):
|
||||
self.generate_tables(input)
|
||||
|
||||
|
||||
class TestGeneratedAbstractCases(unittest.TestCase):
|
||||
def setUp(self) -> None:
|
||||
|
||||
Generated
+11
-7
@@ -12317,9 +12317,12 @@
|
||||
}
|
||||
DISPATCH();
|
||||
}
|
||||
_PyFrame_SetStackPointer(frame, stack_pointer);
|
||||
Py_CLEAR(tracer->prev_state.recorded_value);
|
||||
stack_pointer = _PyFrame_GetStackPointer(frame);
|
||||
for (int i = 0; i < tracer->prev_state.recorded_count; i++) {
|
||||
_PyFrame_SetStackPointer(frame, stack_pointer);
|
||||
Py_CLEAR(tracer->prev_state.recorded_values[i]);
|
||||
stack_pointer = _PyFrame_GetStackPointer(frame);
|
||||
}
|
||||
tracer->prev_state.recorded_count = 0;
|
||||
tracer->prev_state.instr = next_instr;
|
||||
PyObject *prev_code = PyStackRef_AsPyObjectBorrow(frame->f_executable);
|
||||
if (tracer->prev_state.instr_code != (PyCodeObject *)prev_code) {
|
||||
@@ -12333,11 +12336,12 @@
|
||||
if (_PyOpcode_Caches[_PyOpcode_Deopt[opcode]]) {
|
||||
(&next_instr[1])->counter = trigger_backoff_counter();
|
||||
}
|
||||
uint8_t record_func_index = _PyOpcode_RecordFunctionIndices[opcode];
|
||||
if (record_func_index) {
|
||||
_Py_RecordFuncPtr doesnt_escape = _PyOpcode_RecordFunctions[record_func_index];
|
||||
doesnt_escape(frame, stack_pointer, oparg, &tracer->prev_state.recorded_value);
|
||||
const _PyOpcodeRecordEntry *record_entry = &_PyOpcode_RecordEntries[opcode];
|
||||
for (int i = 0; i < record_entry->count; i++) {
|
||||
_Py_RecordFuncPtr doesnt_escape = _PyOpcode_RecordFunctions[record_entry->indices[i]];
|
||||
doesnt_escape(frame, stack_pointer, oparg, &tracer->prev_state.recorded_values[i]);
|
||||
}
|
||||
tracer->prev_state.recorded_count = record_entry->count;
|
||||
DISPATCH_GOTO_NON_TRACING();
|
||||
#else
|
||||
(void)prev_instr;
|
||||
|
||||
+9
-5
@@ -6349,7 +6349,10 @@ dummy_func(
|
||||
ERROR_IF(err < 0);
|
||||
DISPATCH();
|
||||
}
|
||||
Py_CLEAR(tracer->prev_state.recorded_value);
|
||||
for (int i = 0; i < tracer->prev_state.recorded_count; i++) {
|
||||
Py_CLEAR(tracer->prev_state.recorded_values[i]);
|
||||
}
|
||||
tracer->prev_state.recorded_count = 0;
|
||||
tracer->prev_state.instr = next_instr;
|
||||
PyObject *prev_code = PyStackRef_AsPyObjectBorrow(frame->f_executable);
|
||||
if (tracer->prev_state.instr_code != (PyCodeObject *)prev_code) {
|
||||
@@ -6363,11 +6366,12 @@ dummy_func(
|
||||
(&next_instr[1])->counter = trigger_backoff_counter();
|
||||
}
|
||||
|
||||
uint8_t record_func_index = _PyOpcode_RecordFunctionIndices[opcode];
|
||||
if (record_func_index) {
|
||||
_Py_RecordFuncPtr doesnt_escape = _PyOpcode_RecordFunctions[record_func_index];
|
||||
doesnt_escape(frame, stack_pointer, oparg, &tracer->prev_state.recorded_value);
|
||||
const _PyOpcodeRecordEntry *record_entry = &_PyOpcode_RecordEntries[opcode];
|
||||
for (int i = 0; i < record_entry->count; i++) {
|
||||
_Py_RecordFuncPtr doesnt_escape = _PyOpcode_RecordFunctions[record_entry->indices[i]];
|
||||
doesnt_escape(frame, stack_pointer, oparg, &tracer->prev_state.recorded_values[i]);
|
||||
}
|
||||
tracer->prev_state.recorded_count = record_entry->count;
|
||||
DISPATCH_GOTO_NON_TRACING();
|
||||
#else
|
||||
(void)prev_instr;
|
||||
|
||||
Generated
+11
-7
@@ -12314,9 +12314,12 @@
|
||||
}
|
||||
DISPATCH();
|
||||
}
|
||||
_PyFrame_SetStackPointer(frame, stack_pointer);
|
||||
Py_CLEAR(tracer->prev_state.recorded_value);
|
||||
stack_pointer = _PyFrame_GetStackPointer(frame);
|
||||
for (int i = 0; i < tracer->prev_state.recorded_count; i++) {
|
||||
_PyFrame_SetStackPointer(frame, stack_pointer);
|
||||
Py_CLEAR(tracer->prev_state.recorded_values[i]);
|
||||
stack_pointer = _PyFrame_GetStackPointer(frame);
|
||||
}
|
||||
tracer->prev_state.recorded_count = 0;
|
||||
tracer->prev_state.instr = next_instr;
|
||||
PyObject *prev_code = PyStackRef_AsPyObjectBorrow(frame->f_executable);
|
||||
if (tracer->prev_state.instr_code != (PyCodeObject *)prev_code) {
|
||||
@@ -12330,11 +12333,12 @@
|
||||
if (_PyOpcode_Caches[_PyOpcode_Deopt[opcode]]) {
|
||||
(&next_instr[1])->counter = trigger_backoff_counter();
|
||||
}
|
||||
uint8_t record_func_index = _PyOpcode_RecordFunctionIndices[opcode];
|
||||
if (record_func_index) {
|
||||
_Py_RecordFuncPtr doesnt_escape = _PyOpcode_RecordFunctions[record_func_index];
|
||||
doesnt_escape(frame, stack_pointer, oparg, &tracer->prev_state.recorded_value);
|
||||
const _PyOpcodeRecordEntry *record_entry = &_PyOpcode_RecordEntries[opcode];
|
||||
for (int i = 0; i < record_entry->count; i++) {
|
||||
_Py_RecordFuncPtr doesnt_escape = _PyOpcode_RecordFunctions[record_entry->indices[i]];
|
||||
doesnt_escape(frame, stack_pointer, oparg, &tracer->prev_state.recorded_values[i]);
|
||||
}
|
||||
tracer->prev_state.recorded_count = record_entry->count;
|
||||
DISPATCH_GOTO_NON_TRACING();
|
||||
#else
|
||||
(void)prev_instr;
|
||||
|
||||
+17
-8
@@ -866,6 +866,7 @@ _PyJit_translate_single_bytecode_to_trace(
|
||||
assert(nuops > 0);
|
||||
uint32_t orig_oparg = oparg; // For OPARG_TOP/BOTTOM
|
||||
uint32_t orig_target = target;
|
||||
int record_idx = 0;
|
||||
for (int i = 0; i < nuops; i++) {
|
||||
oparg = orig_oparg;
|
||||
target = orig_target;
|
||||
@@ -946,8 +947,9 @@ _PyJit_translate_single_bytecode_to_trace(
|
||||
operand = next->op.arg;
|
||||
}
|
||||
else if (_PyUop_Flags[uop] & HAS_RECORDS_VALUE_FLAG) {
|
||||
PyObject *recorded_value = tracer->prev_state.recorded_value;
|
||||
tracer->prev_state.recorded_value = NULL;
|
||||
PyObject *recorded_value = tracer->prev_state.recorded_values[record_idx];
|
||||
tracer->prev_state.recorded_values[record_idx] = NULL;
|
||||
record_idx++;
|
||||
operand = (uintptr_t)recorded_value;
|
||||
}
|
||||
// All other instructions
|
||||
@@ -1060,12 +1062,16 @@ _PyJit_TryInitializeTracing(
|
||||
tracer->prev_state.instr_frame = frame;
|
||||
tracer->prev_state.instr_oparg = oparg;
|
||||
tracer->prev_state.instr_stacklevel = tracer->initial_state.stack_depth;
|
||||
tracer->prev_state.recorded_value = NULL;
|
||||
uint8_t record_func_index = _PyOpcode_RecordFunctionIndices[curr_instr->op.code];
|
||||
if (record_func_index) {
|
||||
_Py_RecordFuncPtr record_func = _PyOpcode_RecordFunctions[record_func_index];
|
||||
record_func(frame, stack_pointer, oparg, &tracer->prev_state.recorded_value);
|
||||
tracer->prev_state.recorded_count = 0;
|
||||
for (int i = 0; i < MAX_RECORDED_VALUES; i++) {
|
||||
tracer->prev_state.recorded_values[i] = NULL;
|
||||
}
|
||||
const _PyOpcodeRecordEntry *record_entry = &_PyOpcode_RecordEntries[curr_instr->op.code];
|
||||
for (int i = 0; i < record_entry->count; i++) {
|
||||
_Py_RecordFuncPtr record_func = _PyOpcode_RecordFunctions[record_entry->indices[i]];
|
||||
record_func(frame, stack_pointer, oparg, &tracer->prev_state.recorded_values[i]);
|
||||
}
|
||||
tracer->prev_state.recorded_count = record_entry->count;
|
||||
assert(curr_instr->op.code == JUMP_BACKWARD_JIT || curr_instr->op.code == RESUME_CHECK_JIT || (exit != NULL));
|
||||
tracer->initial_state.jump_backward_instr = curr_instr;
|
||||
|
||||
@@ -1117,7 +1123,10 @@ _PyJit_FinalizeTracing(PyThreadState *tstate, int err)
|
||||
Py_CLEAR(tracer->initial_state.func);
|
||||
Py_CLEAR(tracer->initial_state.executor);
|
||||
Py_CLEAR(tracer->prev_state.instr_code);
|
||||
Py_CLEAR(tracer->prev_state.recorded_value);
|
||||
for (int i = 0; i < MAX_RECORDED_VALUES; i++) {
|
||||
Py_CLEAR(tracer->prev_state.recorded_values[i]);
|
||||
}
|
||||
tracer->prev_state.recorded_count = 0;
|
||||
uop_buffer_init(buffer, &tracer->uop_array[0], UOP_MAX_TRACE_LENGTH);
|
||||
tracer->is_tracing = false;
|
||||
}
|
||||
|
||||
Generated
+36
-35
@@ -99,41 +99,42 @@ void _PyOpcode_RecordFunction_CODE(_PyInterpreterFrame *frame, _PyStackRef *stac
|
||||
#define _RECORD_BOUND_METHOD_INDEX 6
|
||||
#define _RECORD_CALLABLE_KW_INDEX 7
|
||||
#define _RECORD_4OS_INDEX 8
|
||||
const uint8_t _PyOpcode_RecordFunctionIndices[256] = {
|
||||
[TO_BOOL_ALWAYS_TRUE] = _RECORD_TOS_TYPE_INDEX,
|
||||
[BINARY_OP_SUBSCR_GETITEM] = _RECORD_NOS_INDEX,
|
||||
[SEND_GEN] = _RECORD_3OS_GEN_FUNC_INDEX,
|
||||
[LOAD_SUPER_ATTR_METHOD] = _RECORD_NOS_INDEX,
|
||||
[LOAD_ATTR_INSTANCE_VALUE] = _RECORD_TOS_TYPE_INDEX,
|
||||
[LOAD_ATTR_WITH_HINT] = _RECORD_TOS_TYPE_INDEX,
|
||||
[LOAD_ATTR_SLOT] = _RECORD_TOS_TYPE_INDEX,
|
||||
[LOAD_ATTR_CLASS_WITH_METACLASS_CHECK] = _RECORD_TOS_TYPE_INDEX,
|
||||
[LOAD_ATTR_PROPERTY] = _RECORD_TOS_TYPE_INDEX,
|
||||
[STORE_ATTR_INSTANCE_VALUE] = _RECORD_TOS_TYPE_INDEX,
|
||||
[STORE_ATTR_WITH_HINT] = _RECORD_TOS_TYPE_INDEX,
|
||||
[STORE_ATTR_SLOT] = _RECORD_TOS_TYPE_INDEX,
|
||||
[FOR_ITER_GEN] = _RECORD_NOS_GEN_FUNC_INDEX,
|
||||
[LOAD_ATTR_METHOD_WITH_VALUES] = _RECORD_TOS_TYPE_INDEX,
|
||||
[LOAD_ATTR_METHOD_NO_DICT] = _RECORD_TOS_TYPE_INDEX,
|
||||
[LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES] = _RECORD_TOS_TYPE_INDEX,
|
||||
[LOAD_ATTR_NONDESCRIPTOR_NO_DICT] = _RECORD_TOS_TYPE_INDEX,
|
||||
[LOAD_ATTR_METHOD_LAZY_DICT] = _RECORD_TOS_TYPE_INDEX,
|
||||
[CALL_PY_GENERAL] = _RECORD_CALLABLE_INDEX,
|
||||
[CALL_BOUND_METHOD_GENERAL] = _RECORD_BOUND_METHOD_INDEX,
|
||||
[CALL_NON_PY_GENERAL] = _RECORD_CALLABLE_INDEX,
|
||||
[CALL_BOUND_METHOD_EXACT_ARGS] = _RECORD_BOUND_METHOD_INDEX,
|
||||
[CALL_PY_EXACT_ARGS] = _RECORD_CALLABLE_INDEX,
|
||||
[CALL_ALLOC_AND_ENTER_INIT] = _RECORD_CALLABLE_INDEX,
|
||||
[CALL_BUILTIN_CLASS] = _RECORD_CALLABLE_INDEX,
|
||||
[CALL_BUILTIN_O] = _RECORD_CALLABLE_INDEX,
|
||||
[CALL_BUILTIN_FAST] = _RECORD_CALLABLE_INDEX,
|
||||
[CALL_BUILTIN_FAST_WITH_KEYWORDS] = _RECORD_CALLABLE_INDEX,
|
||||
[CALL_METHOD_DESCRIPTOR_O] = _RECORD_CALLABLE_INDEX,
|
||||
[CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS] = _RECORD_CALLABLE_INDEX,
|
||||
[CALL_METHOD_DESCRIPTOR_NOARGS] = _RECORD_CALLABLE_INDEX,
|
||||
[CALL_KW_PY] = _RECORD_CALLABLE_KW_INDEX,
|
||||
[CALL_KW_BOUND_METHOD] = _RECORD_CALLABLE_KW_INDEX,
|
||||
[CALL_EX_PY] = _RECORD_4OS_INDEX,
|
||||
|
||||
const _PyOpcodeRecordEntry _PyOpcode_RecordEntries[256] = {
|
||||
[TO_BOOL_ALWAYS_TRUE] = {1, {_RECORD_TOS_TYPE_INDEX}},
|
||||
[BINARY_OP_SUBSCR_GETITEM] = {1, {_RECORD_NOS_INDEX}},
|
||||
[SEND_GEN] = {1, {_RECORD_3OS_GEN_FUNC_INDEX}},
|
||||
[LOAD_SUPER_ATTR_METHOD] = {1, {_RECORD_NOS_INDEX}},
|
||||
[LOAD_ATTR_INSTANCE_VALUE] = {1, {_RECORD_TOS_TYPE_INDEX}},
|
||||
[LOAD_ATTR_WITH_HINT] = {1, {_RECORD_TOS_TYPE_INDEX}},
|
||||
[LOAD_ATTR_SLOT] = {1, {_RECORD_TOS_TYPE_INDEX}},
|
||||
[LOAD_ATTR_CLASS_WITH_METACLASS_CHECK] = {1, {_RECORD_TOS_TYPE_INDEX}},
|
||||
[LOAD_ATTR_PROPERTY] = {1, {_RECORD_TOS_TYPE_INDEX}},
|
||||
[STORE_ATTR_INSTANCE_VALUE] = {1, {_RECORD_TOS_TYPE_INDEX}},
|
||||
[STORE_ATTR_WITH_HINT] = {1, {_RECORD_TOS_TYPE_INDEX}},
|
||||
[STORE_ATTR_SLOT] = {1, {_RECORD_TOS_TYPE_INDEX}},
|
||||
[FOR_ITER_GEN] = {1, {_RECORD_NOS_GEN_FUNC_INDEX}},
|
||||
[LOAD_ATTR_METHOD_WITH_VALUES] = {1, {_RECORD_TOS_TYPE_INDEX}},
|
||||
[LOAD_ATTR_METHOD_NO_DICT] = {1, {_RECORD_TOS_TYPE_INDEX}},
|
||||
[LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES] = {1, {_RECORD_TOS_TYPE_INDEX}},
|
||||
[LOAD_ATTR_NONDESCRIPTOR_NO_DICT] = {1, {_RECORD_TOS_TYPE_INDEX}},
|
||||
[LOAD_ATTR_METHOD_LAZY_DICT] = {1, {_RECORD_TOS_TYPE_INDEX}},
|
||||
[CALL_PY_GENERAL] = {1, {_RECORD_CALLABLE_INDEX}},
|
||||
[CALL_BOUND_METHOD_GENERAL] = {1, {_RECORD_BOUND_METHOD_INDEX}},
|
||||
[CALL_NON_PY_GENERAL] = {1, {_RECORD_CALLABLE_INDEX}},
|
||||
[CALL_BOUND_METHOD_EXACT_ARGS] = {1, {_RECORD_BOUND_METHOD_INDEX}},
|
||||
[CALL_PY_EXACT_ARGS] = {1, {_RECORD_CALLABLE_INDEX}},
|
||||
[CALL_ALLOC_AND_ENTER_INIT] = {1, {_RECORD_CALLABLE_INDEX}},
|
||||
[CALL_BUILTIN_CLASS] = {1, {_RECORD_CALLABLE_INDEX}},
|
||||
[CALL_BUILTIN_O] = {1, {_RECORD_CALLABLE_INDEX}},
|
||||
[CALL_BUILTIN_FAST] = {1, {_RECORD_CALLABLE_INDEX}},
|
||||
[CALL_BUILTIN_FAST_WITH_KEYWORDS] = {1, {_RECORD_CALLABLE_INDEX}},
|
||||
[CALL_METHOD_DESCRIPTOR_O] = {1, {_RECORD_CALLABLE_INDEX}},
|
||||
[CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS] = {1, {_RECORD_CALLABLE_INDEX}},
|
||||
[CALL_METHOD_DESCRIPTOR_NOARGS] = {1, {_RECORD_CALLABLE_INDEX}},
|
||||
[CALL_KW_PY] = {1, {_RECORD_CALLABLE_KW_INDEX}},
|
||||
[CALL_KW_BOUND_METHOD] = {1, {_RECORD_CALLABLE_KW_INDEX}},
|
||||
[CALL_EX_PY] = {1, {_RECORD_4OS_INDEX}},
|
||||
};
|
||||
|
||||
const _Py_RecordFuncPtr _PyOpcode_RecordFunctions[9] = {
|
||||
|
||||
@@ -1132,9 +1132,7 @@ def add_macro(
|
||||
macro: parser.Macro, instructions: dict[str, Instruction], uops: dict[str, Uop]
|
||||
) -> None:
|
||||
parts: list[Part] = []
|
||||
# Track the last non-specializing uop seen, so that recording uops
|
||||
# can follow specializing ones without triggering the position check.
|
||||
prev_uop: Uop | None = None
|
||||
seen_real_uop = False
|
||||
for part in macro.uops:
|
||||
match part:
|
||||
case parser.OpName():
|
||||
@@ -1146,14 +1144,15 @@ def add_macro(
|
||||
f"No Uop named {part.name}", macro.tokens[0]
|
||||
)
|
||||
uop = uops[part.name]
|
||||
if uop.properties.records_value and prev_uop is not None:
|
||||
raise analysis_error(
|
||||
f"Recording uop {part.name} is not allowed "
|
||||
f"after non-specializing uops in macro",
|
||||
macro.tokens[0])
|
||||
if uop.properties.records_value:
|
||||
if seen_real_uop:
|
||||
raise analysis_error(
|
||||
f"Recording uop {part.name} must precede all "
|
||||
f"non-recording, non-specializing uops in macro",
|
||||
macro.tokens[0])
|
||||
elif "specializing" not in uop.annotations:
|
||||
seen_real_uop = True
|
||||
parts.append(uop)
|
||||
if "specializing" not in uop.annotations:
|
||||
prev_uop = uop
|
||||
case parser.CacheEffect():
|
||||
parts.append(Skip(part.size))
|
||||
case _:
|
||||
|
||||
@@ -25,6 +25,9 @@ from stack import Stack, Storage
|
||||
|
||||
DEFAULT_OUTPUT = ROOT / "Python/recorder_functions.c.h"
|
||||
|
||||
# Must match MAX_RECORDED_VALUES in Include/internal/pycore_optimizer.h.
|
||||
MAX_RECORDED_VALUES = 3
|
||||
|
||||
|
||||
class RecorderEmitter(Emitter):
|
||||
def __init__(self, out: CWriter):
|
||||
@@ -81,27 +84,35 @@ def generate_recorder_functions(filenames: list[str], analysis: Analysis, out: C
|
||||
|
||||
def generate_recorder_tables(analysis: Analysis, out: CWriter) -> None:
|
||||
record_function_indexes: dict[str, int] = dict()
|
||||
record_table: dict[str, str] = {}
|
||||
record_table: dict[str, list[str]] = {}
|
||||
index = 1
|
||||
for inst in analysis.instructions.values():
|
||||
if not inst.properties.records_value:
|
||||
continue
|
||||
records: list[str] = []
|
||||
for part in inst.parts:
|
||||
if not part.properties.records_value:
|
||||
continue
|
||||
if part.name not in record_function_indexes:
|
||||
record_function_indexes[part.name] = index
|
||||
index += 1
|
||||
record_table[inst.name] = part.name
|
||||
break
|
||||
records.append(part.name)
|
||||
if records:
|
||||
if len(records) > MAX_RECORDED_VALUES:
|
||||
raise ValueError(
|
||||
f"Instruction {inst.name} has {len(records)} recording ops, "
|
||||
f"exceeds MAX_RECORDED_VALUES ({MAX_RECORDED_VALUES})"
|
||||
)
|
||||
record_table[inst.name] = records
|
||||
func_count = len(record_function_indexes)
|
||||
|
||||
for name, index in record_function_indexes.items():
|
||||
out.emit(f"#define {name}_INDEX {index}\n")
|
||||
args = "_PyJitTracerState *tracer, _PyInterpreterFrame *frame, _PyStackRef *stackpointer, int oparg"
|
||||
out.emit("const uint8_t _PyOpcode_RecordFunctionIndices[256] = {\n")
|
||||
for inst_name, record_name in record_table.items():
|
||||
out.emit(f" [{inst_name}] = {record_name}_INDEX,\n")
|
||||
out.emit("\n")
|
||||
out.emit("const _PyOpcodeRecordEntry _PyOpcode_RecordEntries[256] = {\n")
|
||||
for inst_name, record_names in record_table.items():
|
||||
indices = ", ".join(f"{name}_INDEX" for name in record_names)
|
||||
out.emit(f" [{inst_name}] = {{{len(record_names)}, {{{indices}}}}},\n")
|
||||
out.emit("};\n\n")
|
||||
out.emit(f"const _Py_RecordFuncPtr _PyOpcode_RecordFunctions[{func_count+1}] = {{\n")
|
||||
out.emit(" [0] = NULL,\n")
|
||||
|
||||
Reference in New Issue
Block a user