Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Include/internal/pycore_tstate.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ typedef struct _PyJitTracerPreviousState {
PyCodeObject *instr_code; // Strong
struct _PyInterpreterFrame *instr_frame;
_PyBloomFilter dependencies;
int jump_backward_seen;
Comment thread
Fidget-Spinner marked this conversation as resolved.
Outdated
} _PyJitTracerPreviousState;

typedef struct _PyJitTracerState {
Expand Down
66 changes: 50 additions & 16 deletions Lib/test/test_capi/test_opt.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@ def iter_ops(ex):
def get_ops(ex):
return list(iter_ops(ex))

def count_ops(ex, name):
return len([opname for opname in iter_opnames(ex) if opname == name])


@requires_specialization
@unittest.skipIf(Py_GIL_DISABLED, "optimizer not yet supported in free-threaded builds")
Expand Down Expand Up @@ -1165,22 +1168,6 @@ def testfunc(n):
self.assertIsNotNone(ex)
self.assertIn("_FOR_ITER_TIER_TWO", get_opnames(ex))

@unittest.skip("Tracing into generators currently isn't supported.")
def test_for_iter_gen(self):
def gen(n):
for i in range(n):
yield i
def testfunc(n):
g = gen(n)
s = 0
for i in g:
s += i
return s
res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
self.assertEqual(res, sum(range(TIER2_THRESHOLD)))
self.assertIsNotNone(ex)
self.assertIn("_FOR_ITER_GEN_FRAME", get_opnames(ex))

def test_modified_local_is_seen_by_optimized_code(self):
l = sys._getframe().f_locals
a = 1
Expand Down Expand Up @@ -3302,6 +3289,53 @@ def test_is_none(n):
self.assertIn("_POP_TOP_NOP", uops)
self.assertNotIn("_POP_TOP", uops)

def test_for_iter_gen_frame(self):
def f(n):
for i in range(n):
# Should be optimized to POP_TOP_NOP
yield i + i
def testfunc(n):
for _ in f(n):
pass

res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD*2)
self.assertIsNotNone(ex)
uops = get_opnames(ex)

self.assertIn("_FOR_ITER_GEN_FRAME", uops)
self.assertIn("_YIELD_VALUE", uops)
# It's essential for performance that the trace loops around.
self.assertIn("_JUMP_TO_TOP", uops)
# _POP_TOP_NOP is a sign the optimizer ran and didn't hit bottom.
self.assertGreaterEqual(count_ops(ex, "_POP_TOP_NOP"), 3)

def test_send_gen_frame(self):

def gen(n):
for i in range(n):
yield i + i
def send_gen(n):
yield from gen(n)
def testfunc(n):
for _ in send_gen(n):
pass

for _ in range(_testinternalcapi.SPECIALIZATION_THRESHOLD):
# Ensure SEND is specialized to SEND_GEN
send_gen(10)

res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD*2)
self.assertIsNotNone(ex)
uops = get_opnames(ex)

self.assertIn("_FOR_ITER_GEN_FRAME", uops)
self.assertIn("_YIELD_VALUE", uops)
self.assertIn("_SEND_GEN_FRAME", uops)
# It's essential for performance that the trace loops around.
self.assertIn("_JUMP_TO_TOP", uops)
# _POP_TOP_NOP is a sign the optimizer ran and didn't hit bottom.
self.assertGreaterEqual(count_ops(ex, "_POP_TOP_NOP"), 2)

def test_143026(self):
# https://github.com/python/cpython/issues/143026

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
The JIT optimizer now understands more generator instructions.
24 changes: 17 additions & 7 deletions Python/optimizer.c
Original file line number Diff line number Diff line change
Expand Up @@ -823,16 +823,25 @@ _PyJit_translate_single_bytecode_to_trace(
_tstate->jit_tracer_state.initial_state.exit == NULL &&
// These are coroutines, and we want to unroll those usually.
opcode != JUMP_BACKWARD_NO_INTERRUPT) {
// We encountered a JUMP_BACKWARD but not to the top of our own loop.
// We encountered a second JUMP_BACKWARD but not to the top of our own loop.
// We don't want to continue tracing as we might get stuck in the
// inner loop. Instead, end the trace where the executor of the
// inner loop might start and let the traces rejoin.
OPT_STAT_INC(inner_loop);
ADD_TO_TRACE(_EXIT_TRACE, 0, 0, target);
trace[trace_length-1].operand1 = true; // is_control_flow
DPRINTF(2, "JUMP_BACKWARD not to top ends trace %p %p %p\n", next_instr,
_tstate->jit_tracer_state.initial_state.close_loop_instr, _tstate->jit_tracer_state.initial_state.start_instr);
goto done;
if (_tstate->jit_tracer_state.prev_state.jump_backward_seen >= 1 ||
// Also end the trace early if we probably have no more space left, as it's better
// to link to another backwards jump trace.
trace_length >= (_tstate->jit_tracer_state.prev_state.code_max_size / 3)) {
OPT_STAT_INC(inner_loop);
ADD_TO_TRACE(_EXIT_TRACE, 0, 0, target);
trace[trace_length-1].operand1 = true; // is_control_flow
DPRINTF(2, "JUMP_BACKWARD not to top ends trace %p %p %p\n", next_instr,
_tstate->jit_tracer_state.initial_state.close_loop_instr, _tstate->jit_tracer_state.initial_state.start_instr);
goto done;
}
else {
assert(_tstate->jit_tracer_state.prev_state.jump_backward_seen == 0);
_tstate->jit_tracer_state.prev_state.jump_backward_seen++;
}
}
break;
}
Expand Down Expand Up @@ -1064,6 +1073,7 @@ _PyJit_TryInitializeTracing(
_tstate->jit_tracer_state.initial_state.exit = exit;
_tstate->jit_tracer_state.initial_state.stack_depth = curr_stackdepth;
_tstate->jit_tracer_state.initial_state.chain_depth = chain_depth;
_tstate->jit_tracer_state.prev_state.jump_backward_seen = 0;
_tstate->jit_tracer_state.prev_state.instr_frame = frame;
_tstate->jit_tracer_state.prev_state.dependencies_still_valid = true;
_tstate->jit_tracer_state.prev_state.instr_code = (PyCodeObject *)Py_NewRef(_PyFrame_GetCode(frame));
Expand Down
34 changes: 27 additions & 7 deletions Python/optimizer_bytecodes.c
Original file line number Diff line number Diff line change
Expand Up @@ -927,15 +927,35 @@ dummy_func(void) {
}

op(_FOR_ITER_GEN_FRAME, (unused, unused -- unused, unused, gen_frame)) {
gen_frame = PyJitRef_NULL;
/* We are about to hit the end of the trace */
ctx->done = true;
assert((this_instr + 1)->opcode == _PUSH_FRAME);
PyCodeObject *co = get_code_with_logging((this_instr + 1));
if (co == NULL) {
ctx->done = true;
break;
}
_Py_UOpsAbstractFrame *new_frame = frame_new(ctx, co, 1, NULL, 0);
if (new_frame == NULL) {
ctx->done = true;
break;
}
new_frame->stack[0] = sym_new_const(ctx, Py_None);
gen_frame = PyJitRef_Wrap((JitOptSymbol *)new_frame);
}

op(_SEND_GEN_FRAME, (unused, unused -- unused, gen_frame)) {
gen_frame = PyJitRef_NULL;
// We are about to hit the end of the trace:
ctx->done = true;
op(_SEND_GEN_FRAME, (unused, v -- unused, gen_frame)) {
assert((this_instr + 1)->opcode == _PUSH_FRAME);
PyCodeObject *co = get_code_with_logging((this_instr + 1));
if (co == NULL) {
ctx->done = true;
break;
}
_Py_UOpsAbstractFrame *new_frame = frame_new(ctx, co, 1, NULL, 0);
if (new_frame == NULL) {
ctx->done = true;
break;
}
new_frame->stack[0] = PyJitRef_StripReferenceInfo(v);
gen_frame = PyJitRef_Wrap((JitOptSymbol *)new_frame);
}

op(_CHECK_STACK_SPACE, (unused, unused, unused[oparg] -- unused, unused, unused[oparg])) {
Expand Down
32 changes: 28 additions & 4 deletions Python/optimizer_cases.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading