Skip to content

Commit

Permalink
[3.11] GH-93354: Use exponential backoff to avoid excessive specializ…
Browse files Browse the repository at this point in the history
…ation attempts (GH-93355) (GH-93379)

Co-authored-by: Mark Shannon <[email protected]>
Co-authored-by: Łukasz Langa <[email protected]>
  • Loading branch information
markshannon and ambv authored Jun 30, 2022
1 parent 6c40538 commit 113b309
Show file tree
Hide file tree
Showing 4 changed files with 95 additions and 48 deletions.
47 changes: 44 additions & 3 deletions Include/internal/pycore_code.h
Original file line number Diff line number Diff line change
Expand Up @@ -233,9 +233,6 @@ extern void _PyLineTable_InitAddressRange(
extern int _PyLineTable_NextAddressRange(PyCodeAddressRange *range);
extern int _PyLineTable_PreviousAddressRange(PyCodeAddressRange *range);


#define ADAPTIVE_CACHE_BACKOFF 64

/* Specialization functions */

extern int _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr,
Expand Down Expand Up @@ -475,6 +472,50 @@ write_location_entry_start(uint8_t *ptr, int code, int length)
}


/** Counters
* The first 16-bit value in each inline cache is a counter.
* When counting misses, the counter is treated as a simple unsigned value.
*
* When counting executions until the next specialization attempt,
* exponential backoff is used to reduce the number of specialization failures.
* The high 12 bits store the counter, the low 4 bits store the backoff exponent.
* On a specialization failure, the backoff exponent is incremented and the
* counter set to (2**backoff - 1).
* Backoff == 6 -> starting counter == 63, backoff == 10 -> starting counter == 1023.
*/

/* With a 16-bit counter, we have 12 bits for the counter value, and 4 bits for the backoff */
#define ADAPTIVE_BACKOFF_BITS 4
/* The initial counter value is 31 == 2**ADAPTIVE_BACKOFF_START - 1 */
#define ADAPTIVE_BACKOFF_START 5

#define MAX_BACKOFF_VALUE (16 - ADAPTIVE_BACKOFF_BITS)


static inline uint16_t
adaptive_counter_bits(int value, int backoff) {
return (value << ADAPTIVE_BACKOFF_BITS) |
(backoff & ((1<<ADAPTIVE_BACKOFF_BITS)-1));
}

static inline uint16_t
adaptive_counter_start(void) {
unsigned int value = (1 << ADAPTIVE_BACKOFF_START) - 1;
return adaptive_counter_bits(value, ADAPTIVE_BACKOFF_START);
}

static inline uint16_t
adaptive_counter_backoff(uint16_t counter) {
unsigned int backoff = counter & ((1<<ADAPTIVE_BACKOFF_BITS)-1);
backoff++;
if (backoff > MAX_BACKOFF_VALUE) {
backoff = MAX_BACKOFF_VALUE;
}
unsigned int value = (1 << backoff) - 1;
return adaptive_counter_bits(value, backoff);
}


/* Line array cache for tracing */

extern int _PyCode_CreateLineArray(PyCodeObject *co);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Use exponential backoff for specialization counters in the interpreter. Can
reduce the number of failed specializations significantly and avoid slowdown
for those parts of a program that are not suitable for specialization.
46 changes: 25 additions & 21 deletions Python/ceval.c
Original file line number Diff line number Diff line change
Expand Up @@ -1559,7 +1559,11 @@ eval_frame_handle_pending(PyThreadState *tstate)
dtrace_function_entry(frame); \
}

#define ADAPTIVE_COUNTER_IS_ZERO(cache) \
(cache)->counter < (1<<ADAPTIVE_BACKOFF_BITS)

#define DECREMENT_ADAPTIVE_COUNTER(cache) \
(cache)->counter -= (1<<ADAPTIVE_BACKOFF_BITS)

static int
trace_function_entry(PyThreadState *tstate, _PyInterpreterFrame *frame)
Expand Down Expand Up @@ -2154,7 +2158,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int

TARGET(BINARY_SUBSCR_ADAPTIVE) {
_PyBinarySubscrCache *cache = (_PyBinarySubscrCache *)next_instr;
if (cache->counter == 0) {
if (ADAPTIVE_COUNTER_IS_ZERO(cache)) {
PyObject *sub = TOP();
PyObject *container = SECOND();
next_instr--;
Expand All @@ -2165,7 +2169,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
}
else {
STAT_INC(BINARY_SUBSCR, deferred);
cache->counter--;
DECREMENT_ADAPTIVE_COUNTER(cache);
JUMP_TO_INSTRUCTION(BINARY_SUBSCR);
}
}
Expand Down Expand Up @@ -2319,7 +2323,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int

TARGET(STORE_SUBSCR_ADAPTIVE) {
_PyStoreSubscrCache *cache = (_PyStoreSubscrCache *)next_instr;
if (cache->counter == 0) {
if (ADAPTIVE_COUNTER_IS_ZERO(cache)) {
PyObject *sub = TOP();
PyObject *container = SECOND();
next_instr--;
Expand All @@ -2330,7 +2334,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
}
else {
STAT_INC(STORE_SUBSCR, deferred);
cache->counter--;
DECREMENT_ADAPTIVE_COUNTER(cache);
JUMP_TO_INSTRUCTION(STORE_SUBSCR);
}
}
Expand Down Expand Up @@ -2812,15 +2816,15 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
TARGET(UNPACK_SEQUENCE_ADAPTIVE) {
assert(cframe.use_tracing == 0);
_PyUnpackSequenceCache *cache = (_PyUnpackSequenceCache *)next_instr;
if (cache->counter == 0) {
if (ADAPTIVE_COUNTER_IS_ZERO(cache)) {
PyObject *seq = TOP();
next_instr--;
_Py_Specialize_UnpackSequence(seq, next_instr, oparg);
NOTRACE_DISPATCH_SAME_OPARG();
}
else {
STAT_INC(UNPACK_SEQUENCE, deferred);
cache->counter--;
DECREMENT_ADAPTIVE_COUNTER(cache);
JUMP_TO_INSTRUCTION(UNPACK_SEQUENCE);
}
}
Expand Down Expand Up @@ -3053,7 +3057,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
TARGET(LOAD_GLOBAL_ADAPTIVE) {
assert(cframe.use_tracing == 0);
_PyLoadGlobalCache *cache = (_PyLoadGlobalCache *)next_instr;
if (cache->counter == 0) {
if (ADAPTIVE_COUNTER_IS_ZERO(cache)) {
PyObject *name = GETITEM(names, oparg>>1);
next_instr--;
if (_Py_Specialize_LoadGlobal(GLOBALS(), BUILTINS(), next_instr, name) < 0) {
Expand All @@ -3063,7 +3067,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
}
else {
STAT_INC(LOAD_GLOBAL, deferred);
cache->counter--;
DECREMENT_ADAPTIVE_COUNTER(cache);
JUMP_TO_INSTRUCTION(LOAD_GLOBAL);
}
}
Expand Down Expand Up @@ -3477,7 +3481,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
TARGET(LOAD_ATTR_ADAPTIVE) {
assert(cframe.use_tracing == 0);
_PyAttrCache *cache = (_PyAttrCache *)next_instr;
if (cache->counter == 0) {
if (ADAPTIVE_COUNTER_IS_ZERO(cache)) {
PyObject *owner = TOP();
PyObject *name = GETITEM(names, oparg);
next_instr--;
Expand All @@ -3488,7 +3492,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
}
else {
STAT_INC(LOAD_ATTR, deferred);
cache->counter--;
DECREMENT_ADAPTIVE_COUNTER(cache);
JUMP_TO_INSTRUCTION(LOAD_ATTR);
}
}
Expand Down Expand Up @@ -3586,7 +3590,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
TARGET(STORE_ATTR_ADAPTIVE) {
assert(cframe.use_tracing == 0);
_PyAttrCache *cache = (_PyAttrCache *)next_instr;
if (cache->counter == 0) {
if (ADAPTIVE_COUNTER_IS_ZERO(cache)) {
PyObject *owner = TOP();
PyObject *name = GETITEM(names, oparg);
next_instr--;
Expand All @@ -3597,7 +3601,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
}
else {
STAT_INC(STORE_ATTR, deferred);
cache->counter--;
DECREMENT_ADAPTIVE_COUNTER(cache);
JUMP_TO_INSTRUCTION(STORE_ATTR);
}
}
Expand Down Expand Up @@ -3716,7 +3720,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
TARGET(COMPARE_OP_ADAPTIVE) {
assert(cframe.use_tracing == 0);
_PyCompareOpCache *cache = (_PyCompareOpCache *)next_instr;
if (cache->counter == 0) {
if (ADAPTIVE_COUNTER_IS_ZERO(cache)) {
PyObject *right = TOP();
PyObject *left = SECOND();
next_instr--;
Expand All @@ -3725,7 +3729,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
}
else {
STAT_INC(COMPARE_OP, deferred);
cache->counter--;
DECREMENT_ADAPTIVE_COUNTER(cache);
JUMP_TO_INSTRUCTION(COMPARE_OP);
}
}
Expand Down Expand Up @@ -4523,7 +4527,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
TARGET(LOAD_METHOD_ADAPTIVE) {
assert(cframe.use_tracing == 0);
_PyLoadMethodCache *cache = (_PyLoadMethodCache *)next_instr;
if (cache->counter == 0) {
if (ADAPTIVE_COUNTER_IS_ZERO(cache)) {
PyObject *owner = TOP();
PyObject *name = GETITEM(names, oparg);
next_instr--;
Expand All @@ -4534,7 +4538,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
}
else {
STAT_INC(LOAD_METHOD, deferred);
cache->counter--;
DECREMENT_ADAPTIVE_COUNTER(cache);
JUMP_TO_INSTRUCTION(LOAD_METHOD);
}
}
Expand Down Expand Up @@ -4815,7 +4819,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int

TARGET(CALL_ADAPTIVE) {
_PyCallCache *cache = (_PyCallCache *)next_instr;
if (cache->counter == 0) {
if (ADAPTIVE_COUNTER_IS_ZERO(cache)) {
next_instr--;
int is_meth = is_method(stack_pointer, oparg);
int nargs = oparg + is_meth;
Expand All @@ -4829,7 +4833,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
}
else {
STAT_INC(CALL, deferred);
cache->counter--;
DECREMENT_ADAPTIVE_COUNTER(cache);
goto call_function;
}
}
Expand Down Expand Up @@ -5560,7 +5564,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
TARGET(BINARY_OP_ADAPTIVE) {
assert(cframe.use_tracing == 0);
_PyBinaryOpCache *cache = (_PyBinaryOpCache *)next_instr;
if (cache->counter == 0) {
if (ADAPTIVE_COUNTER_IS_ZERO(cache)) {
PyObject *lhs = SECOND();
PyObject *rhs = TOP();
next_instr--;
Expand All @@ -5569,7 +5573,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
}
else {
STAT_INC(BINARY_OP, deferred);
cache->counter--;
DECREMENT_ADAPTIVE_COUNTER(cache);
JUMP_TO_INSTRUCTION(BINARY_OP);
}
}
Expand Down Expand Up @@ -5690,7 +5694,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
assert(adaptive_opcode);
_Py_SET_OPCODE(next_instr[-1], adaptive_opcode);
STAT_INC(opcode, deopt);
*counter = ADAPTIVE_CACHE_BACKOFF;
*counter = adaptive_counter_start();
}
next_instr--;
DISPATCH_GOTO();
Expand Down
Loading

0 comments on commit 113b309

Please sign in to comment.