Skip to content

Commit

Permalink
bpo-46841: Use inline caching for COMPARE_OP (GH-31622)
Browse files Browse the repository at this point in the history
  • Loading branch information
brandtbucher authored Mar 1, 2022
1 parent df9f759 commit 7820a58
Show file tree
Hide file tree
Showing 9 changed files with 179 additions and 163 deletions.
16 changes: 12 additions & 4 deletions Include/internal/pycore_code.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,13 +77,20 @@ typedef struct {
} _PyBinaryOpCache;

#define INLINE_CACHE_ENTRIES_BINARY_OP CACHE_ENTRIES(_PyBinaryOpCache)

typedef struct {
_Py_CODEUNIT counter;
} _PyUnpackSequenceCache;


#define INLINE_CACHE_ENTRIES_UNPACK_SEQUENCE \
(sizeof(_PyUnpackSequenceCache) / sizeof(_Py_CODEUNIT))
CACHE_ENTRIES(_PyUnpackSequenceCache)

typedef struct {
_Py_CODEUNIT counter;
_Py_CODEUNIT mask;
} _PyCompareOpCache;

#define INLINE_CACHE_ENTRIES_COMPARE_OP CACHE_ENTRIES(_PyCompareOpCache)

/* Maximum size of code to quicken, in code units. */
#define MAX_SIZE_TO_QUICKEN 5000
Expand Down Expand Up @@ -323,8 +330,9 @@ extern int _Py_Specialize_Call(PyObject *callable, _Py_CODEUNIT *instr, int narg
extern int _Py_Specialize_Precall(PyObject *callable, _Py_CODEUNIT *instr, int nargs,
PyObject *kwnames, SpecializedCacheEntry *cache, PyObject *builtins);
extern void _Py_Specialize_BinaryOp(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT *instr,
int oparg);
extern void _Py_Specialize_CompareOp(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT *instr, SpecializedCacheEntry *cache);
int oparg);
extern void _Py_Specialize_CompareOp(PyObject *lhs, PyObject *rhs,
_Py_CODEUNIT *instr, int oparg);
extern void _Py_Specialize_UnpackSequence(PyObject *seq, _Py_CODEUNIT *instr,
int oparg);

Expand Down
1 change: 1 addition & 0 deletions Include/opcode.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion Lib/importlib/_bootstrap_external.py
Original file line number Diff line number Diff line change
Expand Up @@ -389,6 +389,7 @@ def _write_atomic(path, data, mode=0o666):
# Python 3.11a5 3480 (New CALL opcodes, second iteration)
# Python 3.11a5 3481 (Use inline cache for BINARY_OP)
# Python 3.11a5 3482 (Use inline caching for UNPACK_SEQUENCE and LOAD_GLOBAL)
# Python 3.11a5 3483 (Use inline caching for COMPARE_OP)

# Python 3.12 will start with magic number 3500

Expand All @@ -403,7 +404,7 @@ def _write_atomic(path, data, mode=0o666):
# Whenever MAGIC_NUMBER is changed, the ranges in the magic_values array
# in PC/launcher.c must also be updated.

MAGIC_NUMBER = (3482).to_bytes(2, 'little') + b'\r\n'
MAGIC_NUMBER = (3483).to_bytes(2, 'little') + b'\r\n'
_RAW_MAGIC_NUMBER = int.from_bytes(MAGIC_NUMBER, 'little') # For import.c

_PYCACHE = '__pycache__'
Expand Down
2 changes: 1 addition & 1 deletion Lib/opcode.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def jabs_op(name, op, entries=0):
def_op('BUILD_SET', 104) # Number of set items
def_op('BUILD_MAP', 105) # Number of dict entries
name_op('LOAD_ATTR', 106) # Index in name list
def_op('COMPARE_OP', 107) # Comparison operator
def_op('COMPARE_OP', 107, 2) # Comparison operator
hascompare.append(107)
name_op('IMPORT_NAME', 108) # Index in name list
name_op('IMPORT_FROM', 109) # Index in name list
Expand Down
4 changes: 3 additions & 1 deletion Lib/test/test_compile.py
Original file line number Diff line number Diff line change
Expand Up @@ -1002,7 +1002,9 @@ def if_else_break():
'JUMP_FORWARD',
)

for line, instr in enumerate(dis.Bytecode(if_else_break)):
for line, instr in enumerate(
dis.Bytecode(if_else_break, show_caches=True)
):
if instr.opname == 'JUMP_FORWARD':
self.assertNotEqual(instr.arg, 0)
elif instr.opname in HANDLED_JUMPS:
Expand Down
246 changes: 123 additions & 123 deletions Lib/test/test_dis.py

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Use inline caching for :opcode:`COMPARE_OP`.
33 changes: 18 additions & 15 deletions Python/ceval.c
Original file line number Diff line number Diff line change
Expand Up @@ -3671,36 +3671,37 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
SET_TOP(res);
Py_DECREF(left);
Py_DECREF(right);
if (res == NULL)
if (res == NULL) {
goto error;
}
JUMPBY(INLINE_CACHE_ENTRIES_COMPARE_OP);
PREDICT(POP_JUMP_IF_FALSE);
PREDICT(POP_JUMP_IF_TRUE);
DISPATCH();
}

TARGET(COMPARE_OP_ADAPTIVE) {
assert(cframe.use_tracing == 0);
SpecializedCacheEntry *cache = GET_CACHE();
if (cache->adaptive.counter == 0) {
_PyCompareOpCache *cache = (_PyCompareOpCache *)next_instr;
if (cache->counter == 0) {
PyObject *right = TOP();
PyObject *left = SECOND();
next_instr--;
_Py_Specialize_CompareOp(left, right, next_instr, cache);
_Py_Specialize_CompareOp(left, right, next_instr, oparg);
DISPATCH();
}
else {
STAT_INC(COMPARE_OP, deferred);
cache->adaptive.counter--;
oparg = cache->adaptive.original_oparg;
cache->counter--;
JUMP_TO_INSTRUCTION(COMPARE_OP);
}
}

TARGET(COMPARE_OP_FLOAT_JUMP) {
assert(cframe.use_tracing == 0);
// Combined: COMPARE_OP (float ? float) + POP_JUMP_IF_(true/false)
SpecializedCacheEntry *caches = GET_CACHE();
int when_to_jump_mask = caches[0].adaptive.index;
_PyCompareOpCache *cache = (_PyCompareOpCache *)next_instr;
int when_to_jump_mask = cache->mask;
PyObject *right = TOP();
PyObject *left = SECOND();
DEOPT_IF(!PyFloat_CheckExact(left), COMPARE_OP);
Expand All @@ -3711,6 +3712,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
DEOPT_IF(isnan(dleft), COMPARE_OP);
DEOPT_IF(isnan(dright), COMPARE_OP);
STAT_INC(COMPARE_OP, hit);
JUMPBY(INLINE_CACHE_ENTRIES_COMPARE_OP);
NEXTOPARG();
STACK_SHRINK(2);
Py_DECREF(left);
Expand All @@ -3731,8 +3733,8 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
TARGET(COMPARE_OP_INT_JUMP) {
assert(cframe.use_tracing == 0);
// Combined: COMPARE_OP (int ? int) + POP_JUMP_IF_(true/false)
SpecializedCacheEntry *caches = GET_CACHE();
int when_to_jump_mask = caches[0].adaptive.index;
_PyCompareOpCache *cache = (_PyCompareOpCache *)next_instr;
int when_to_jump_mask = cache->mask;
PyObject *right = TOP();
PyObject *left = SECOND();
DEOPT_IF(!PyLong_CheckExact(left), COMPARE_OP);
Expand All @@ -3744,6 +3746,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
Py_ssize_t ileft = Py_SIZE(left) * ((PyLongObject *)left)->ob_digit[0];
Py_ssize_t iright = Py_SIZE(right) * ((PyLongObject *)right)->ob_digit[0];
int sign = (ileft > iright) - (ileft < iright);
JUMPBY(INLINE_CACHE_ENTRIES_COMPARE_OP);
NEXTOPARG();
STACK_SHRINK(2);
Py_DECREF(left);
Expand All @@ -3764,8 +3767,8 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
TARGET(COMPARE_OP_STR_JUMP) {
assert(cframe.use_tracing == 0);
// Combined: COMPARE_OP (str == str or str != str) + POP_JUMP_IF_(true/false)
SpecializedCacheEntry *caches = GET_CACHE();
int invert = caches[0].adaptive.index;
_PyCompareOpCache *cache = (_PyCompareOpCache *)next_instr;
int invert = cache->mask;
PyObject *right = TOP();
PyObject *left = SECOND();
DEOPT_IF(!PyUnicode_CheckExact(left), COMPARE_OP);
Expand All @@ -3775,8 +3778,8 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
if (res < 0) {
goto error;
}
assert(caches[0].adaptive.original_oparg == Py_EQ ||
caches[0].adaptive.original_oparg == Py_NE);
assert(oparg == Py_EQ || oparg == Py_NE);
JUMPBY(INLINE_CACHE_ENTRIES_COMPARE_OP);
NEXTOPARG();
assert(opcode == POP_JUMP_IF_TRUE || opcode == POP_JUMP_IF_FALSE);
STACK_SHRINK(2);
Expand Down Expand Up @@ -5601,7 +5604,7 @@ MISS_WITH_CACHE(LOAD_METHOD)
MISS_WITH_CACHE(PRECALL)
MISS_WITH_CACHE(CALL)
MISS_WITH_INLINE_CACHE(BINARY_OP)
MISS_WITH_CACHE(COMPARE_OP)
MISS_WITH_INLINE_CACHE(COMPARE_OP)
MISS_WITH_CACHE(BINARY_SUBSCR)
MISS_WITH_INLINE_CACHE(UNPACK_SEQUENCE)
MISS_WITH_OPARG_COUNTER(STORE_SUBSCR)
Expand Down
36 changes: 18 additions & 18 deletions Python/specialize.c
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,6 @@ static uint8_t cache_requirements[256] = {
[CALL] = 2, /* _PyAdaptiveEntry and _PyObjectCache/_PyCallCache */
[PRECALL] = 2, /* _PyAdaptiveEntry and _PyObjectCache/_PyCallCache */
[STORE_ATTR] = 1, // _PyAdaptiveEntry
[COMPARE_OP] = 1, /* _PyAdaptiveEntry */
};

Py_ssize_t _Py_QuickenedCount = 0;
Expand Down Expand Up @@ -2057,26 +2056,27 @@ static int compare_masks[] = {
};

void
_Py_Specialize_CompareOp(PyObject *lhs, PyObject *rhs,
_Py_CODEUNIT *instr, SpecializedCacheEntry *cache)
_Py_Specialize_CompareOp(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT *instr,
int oparg)
{
_PyAdaptiveEntry *adaptive = &cache->adaptive;
int op = adaptive->original_oparg;
int next_opcode = _Py_OPCODE(instr[1]);
assert(_PyOpcode_InlineCacheEntries[COMPARE_OP] ==
INLINE_CACHE_ENTRIES_COMPARE_OP);
_PyCompareOpCache *cache = (_PyCompareOpCache *)(instr + 1);
int next_opcode = _Py_OPCODE(instr[INLINE_CACHE_ENTRIES_COMPARE_OP + 1]);
if (next_opcode != POP_JUMP_IF_FALSE && next_opcode != POP_JUMP_IF_TRUE) {
// Can't ever combine, so don't don't bother being adaptive (unless
// we're collecting stats, where it's more important to get accurate hit
// counts for the unadaptive version and each of the different failure
// types):
#ifndef Py_STATS
*instr = _Py_MAKECODEUNIT(COMPARE_OP, adaptive->original_oparg);
*instr = _Py_MAKECODEUNIT(COMPARE_OP, oparg);
return;
#endif
SPECIALIZATION_FAIL(COMPARE_OP, SPEC_FAIL_COMPARE_OP_NOT_FOLLOWED_BY_COND_JUMP);
goto failure;
}
assert(op <= Py_GE);
int when_to_jump_mask = compare_masks[op];
assert(oparg <= Py_GE);
int when_to_jump_mask = compare_masks[oparg];
if (next_opcode == POP_JUMP_IF_FALSE) {
when_to_jump_mask = (1 | 2 | 4) & ~when_to_jump_mask;
}
Expand All @@ -2085,14 +2085,14 @@ _Py_Specialize_CompareOp(PyObject *lhs, PyObject *rhs,
goto failure;
}
if (PyFloat_CheckExact(lhs)) {
*instr = _Py_MAKECODEUNIT(COMPARE_OP_FLOAT_JUMP, _Py_OPARG(*instr));
adaptive->index = when_to_jump_mask;
*instr = _Py_MAKECODEUNIT(COMPARE_OP_FLOAT_JUMP, oparg);
cache->mask = when_to_jump_mask;
goto success;
}
if (PyLong_CheckExact(lhs)) {
if (Py_ABS(Py_SIZE(lhs)) <= 1 && Py_ABS(Py_SIZE(rhs)) <= 1) {
*instr = _Py_MAKECODEUNIT(COMPARE_OP_INT_JUMP, _Py_OPARG(*instr));
adaptive->index = when_to_jump_mask;
*instr = _Py_MAKECODEUNIT(COMPARE_OP_INT_JUMP, oparg);
cache->mask = when_to_jump_mask;
goto success;
}
else {
Expand All @@ -2101,24 +2101,24 @@ _Py_Specialize_CompareOp(PyObject *lhs, PyObject *rhs,
}
}
if (PyUnicode_CheckExact(lhs)) {
if (op != Py_EQ && op != Py_NE) {
if (oparg != Py_EQ && oparg != Py_NE) {
SPECIALIZATION_FAIL(COMPARE_OP, SPEC_FAIL_COMPARE_OP_STRING);
goto failure;
}
else {
*instr = _Py_MAKECODEUNIT(COMPARE_OP_STR_JUMP, _Py_OPARG(*instr));
adaptive->index = (when_to_jump_mask & 2) == 0;
*instr = _Py_MAKECODEUNIT(COMPARE_OP_STR_JUMP, oparg);
cache->mask = (when_to_jump_mask & 2) == 0;
goto success;
}
}
SPECIALIZATION_FAIL(COMPARE_OP, compare_op_fail_kind(lhs, rhs));
failure:
STAT_INC(COMPARE_OP, failure);
cache_backoff(adaptive);
cache->counter = ADAPTIVE_CACHE_BACKOFF;
return;
success:
STAT_INC(COMPARE_OP, success);
adaptive->counter = initial_counter_value();
cache->counter = initial_counter_value();
}

#ifdef Py_STATS
Expand Down

0 comments on commit 7820a58

Please sign in to comment.