diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 3b251d3814b1da..9bfb4f4f3a4dea 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -169,7 +169,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_CHECK_FUNCTION_EXACT_ARGS] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, [_CHECK_STACK_SPACE] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, [_INIT_CALL_PY_EXACT_ARGS] = HAS_ARG_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG, - [_PUSH_FRAME] = 0, + [_PUSH_FRAME] = HAS_ESCAPES_FLAG, [_CALL_TYPE_1] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, [_CALL_STR_1] = HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_CALL_TUPLE_1] = HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 6df99d6465347f..c48f0a17c60fb1 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -805,7 +805,8 @@ dummy_func( #if TIER_ONE assert(frame != &entry_frame); #endif - STORE_SP(); + SYNC_SP(); + _PyFrame_SetStackPointer(frame, stack_pointer); assert(EMPTY()); _Py_LeaveRecursiveCallPy(tstate); // GH-99729: We need to unlink the frame *before* clearing it: @@ -3154,7 +3155,8 @@ dummy_func( // Write it out explicitly because it's subtly different. // Eventually this should be the only occurrence of this code. assert(tstate->interp->eval_frame == NULL); - STORE_SP(); + SYNC_SP(); + _PyFrame_SetStackPointer(frame, stack_pointer); new_frame->previous = frame; CALL_STAT_INC(inlined_py_calls); frame = tstate->current_frame = new_frame; @@ -4013,20 +4015,27 @@ dummy_func( ///////// Tier-2 only opcodes ///////// op (_GUARD_IS_TRUE_POP, (flag -- )) { - DEOPT_IF(Py_IsFalse(flag)); + SYNC_SP(); + DEOPT_IF(!Py_IsTrue(flag)); assert(Py_IsTrue(flag)); } op (_GUARD_IS_FALSE_POP, (flag -- )) { - DEOPT_IF(Py_IsTrue(flag)); + SYNC_SP(); + DEOPT_IF(!Py_IsFalse(flag)); assert(Py_IsFalse(flag)); } op (_GUARD_IS_NONE_POP, (val -- )) { - DEOPT_IF(!Py_IsNone(val)); + SYNC_SP(); + if (!Py_IsNone(val)) { + Py_DECREF(val); + DEOPT_IF(1); + } } op (_GUARD_IS_NOT_NONE_POP, (val -- )) { + SYNC_SP(); DEOPT_IF(Py_IsNone(val)); Py_DECREF(val); } diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 6060bebca9afa5..2b4399b25bae2b 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -3318,35 +3318,38 @@ case _GUARD_IS_TRUE_POP: { PyObject *flag; flag = stack_pointer[-1]; - if (Py_IsFalse(flag)) goto deoptimize; - assert(Py_IsTrue(flag)); stack_pointer += -1; + if (!Py_IsTrue(flag)) goto deoptimize; + assert(Py_IsTrue(flag)); break; } case _GUARD_IS_FALSE_POP: { PyObject *flag; flag = stack_pointer[-1]; - if (Py_IsTrue(flag)) goto deoptimize; - assert(Py_IsFalse(flag)); stack_pointer += -1; + if (!Py_IsFalse(flag)) goto deoptimize; + assert(Py_IsFalse(flag)); break; } case _GUARD_IS_NONE_POP: { PyObject *val; val = stack_pointer[-1]; - if (!Py_IsNone(val)) goto deoptimize; stack_pointer += -1; + if (!Py_IsNone(val)) { + Py_DECREF(val); + if (1) goto deoptimize; + } break; } case _GUARD_IS_NOT_NONE_POP: { PyObject *val; val = stack_pointer[-1]; + stack_pointer += -1; if (Py_IsNone(val)) goto deoptimize; Py_DECREF(val); - stack_pointer += -1; break; } diff --git a/Python/optimizer.c b/Python/optimizer.c index 236ae266971d48..1551a5ef61f892 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -481,18 +481,19 @@ translate_bytecode_to_trace( goto done; } uint32_t uopcode = BRANCH_TO_GUARD[opcode - POP_JUMP_IF_FALSE][jump_likely]; - _Py_CODEUNIT *next_instr = instr + 1 + _PyOpcode_Caches[_PyOpcode_Deopt[opcode]]; DPRINTF(2, "%s(%d): counter=%x, bitcount=%d, likely=%d, confidence=%d, uopcode=%s\n", _PyOpcode_OpName[opcode], oparg, counter, bitcount, jump_likely, confidence, _PyUOpName(uopcode)); - ADD_TO_TRACE(uopcode, max_length, 0, target); + _Py_CODEUNIT *next_instr = instr + 1 + _PyOpcode_Caches[_PyOpcode_Deopt[opcode]]; + _Py_CODEUNIT *target_instr = next_instr + oparg; if (jump_likely) { - _Py_CODEUNIT *target_instr = next_instr + oparg; DPRINTF(2, "Jump likely (%x = %d bits), continue at byte offset %d\n", instr[1].cache, bitcount, 2 * INSTR_IP(target_instr, code)); instr = target_instr; + ADD_TO_TRACE(uopcode, max_length, 0, INSTR_IP(next_instr, code)); goto top; } + ADD_TO_TRACE(uopcode, max_length, 0, INSTR_IP(target_instr, code)); break; } diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 7ed3b57136554f..b80fa66e2a159a 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -464,7 +464,7 @@ def compute_properties(op: parser.InstDef) -> Properties: ends_with_eval_breaker=eval_breaker_at_end(op), needs_this=variable_used(op, "this_instr"), always_exits=always_exits(op), - stores_sp=variable_used(op, "STORE_SP"), + stores_sp=variable_used(op, "SYNC_SP"), tier_one_only=variable_used(op, "TIER_ONE_ONLY"), uses_co_consts=variable_used(op, "FRAME_CO_CONSTS"), uses_co_names=variable_used(op, "FRAME_CO_NAMES"), diff --git a/Tools/cases_generator/generators_common.py b/Tools/cases_generator/generators_common.py index c6c602c7122b41..2fc2ab115321cf 100644 --- a/Tools/cases_generator/generators_common.py +++ b/Tools/cases_generator/generators_common.py @@ -124,7 +124,7 @@ def replace_decrefs( out.emit(f"Py_DECREF({var.name});\n") -def replace_store_sp( +def replace_sync_sp( out: CWriter, tkn: Token, tkn_iter: Iterator[Token], @@ -135,9 +135,7 @@ def replace_store_sp( next(tkn_iter) next(tkn_iter) next(tkn_iter) - out.emit_at("", tkn) stack.flush(out) - out.emit("_PyFrame_SetStackPointer(frame, stack_pointer);\n") def replace_check_eval_breaker( @@ -160,7 +158,7 @@ def replace_check_eval_breaker( "ERROR_IF": replace_error, "DECREF_INPUTS": replace_decrefs, "CHECK_EVAL_BREAKER": replace_check_eval_breaker, - "STORE_SP": replace_store_sp, + "SYNC_SP": replace_sync_sp, } ReplacementFunctionType = Callable[ diff --git a/Tools/cases_generator/interpreter_definition.md b/Tools/cases_generator/interpreter_definition.md index e5a48999e962cb..e87aff43762b11 100644 --- a/Tools/cases_generator/interpreter_definition.md +++ b/Tools/cases_generator/interpreter_definition.md @@ -6,7 +6,7 @@ The CPython interpreter is defined in C, meaning that the semantics of the bytecode instructions, the dispatching mechanism, error handling, and tracing and instrumentation are all intermixed. -This document proposes defining a custom C-like DSL for defining the +This document proposes defining a custom C-like DSL for defining the instruction semantics and tools for generating the code deriving from the instruction definitions. @@ -46,7 +46,7 @@ passes from the semantic definition, reducing errors. As we improve the performance of CPython, we need to optimize larger regions of code, use more complex optimizations and, ultimately, translate to machine -code. +code. All of these steps introduce the possibility of more bugs, and require more code to be written. One way to mitigate this is through the use of code generators. @@ -62,7 +62,7 @@ blocks as the instructions for the tier 1 (PEP 659) interpreter. Rewriting all the instructions is tedious and error-prone, and changing the instructions is a maintenance headache as both versions need to be kept in sync. -By using a code generator and using a common source for the instructions, or +By using a code generator and using a common source for the instructions, or parts of instructions, we can reduce the potential for errors considerably. @@ -75,7 +75,7 @@ We update it as the need arises. Each op definition has a kind, a name, a stack and instruction stream effect, and a piece of C code describing its semantics:: - + ``` file: (definition | family | pseudo)+ @@ -86,7 +86,7 @@ and a piece of C code describing its semantics:: "op" "(" NAME "," stack_effect ")" "{" C-code "}" | "macro" "(" NAME ")" "=" uop ("+" uop)* ";" - + stack_effect: "(" [inputs] "--" [outputs] ")" @@ -201,6 +201,7 @@ Those functions include: * `DEOPT_IF(cond, instruction)`. Deoptimize if `cond` is met. * `ERROR_IF(cond, label)`. Jump to error handler at `label` if `cond` is true. * `DECREF_INPUTS()`. Generate `Py_DECREF()` calls for the input stack effects. +* `SYNC_SP()`. Synchronizes the physical stack pointer with the stack effects. Note that the use of `DECREF_INPUTS()` is optional -- manual calls to `Py_DECREF()` or other approaches are also acceptable @@ -445,7 +446,7 @@ rather than popping and pushing, such that `LOAD_ATTR_SLOT` would look something stack_pointer += 1; } s1 = res; - } + } next_instr += (1 + 1 + 2 + 1 + 4); stack_pointer[-1] = s1; DISPATCH(); diff --git a/Tools/cases_generator/stack.py b/Tools/cases_generator/stack.py index 6633950aada002..f62ece43c1be7f 100644 --- a/Tools/cases_generator/stack.py +++ b/Tools/cases_generator/stack.py @@ -169,6 +169,7 @@ def push(self, var: StackItem) -> str: return "" def flush(self, out: CWriter) -> None: + out.start_line() for var in self.variables: if not var.peek: cast = "(PyObject *)" if var.type else "" @@ -189,6 +190,7 @@ def flush(self, out: CWriter) -> None: self.base_offset.clear() self.top_offset.clear() self.peek_offset.clear() + out.start_line() def as_comment(self) -> str: return f"/* Variables: {[v.name for v in self.variables]}. Base offset: {self.base_offset.to_c()}. Top offset: {self.top_offset.to_c()} */"