Skip to content

Commit ef6ac24

Browse files
committed
address many reviews
1 parent 709c0a1 commit ef6ac24

File tree

6 files changed

+55
-165
lines changed

6 files changed

+55
-165
lines changed

Include/cpython/pystats.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,6 @@ typedef struct _optimization_stats {
145145
uint64_t trace_immediately_deopts;
146146
uint64_t executors_invalidated;
147147
uint64_t fitness_terminated_traces;
148-
uint64_t best_exit_fallback;
149148
UOpStats opcode[PYSTATS_MAX_UOP_ID + 1];
150149
uint64_t unsupported_opcode[256];
151150
uint64_t trace_length_hist[_Py_UOP_HIST_SIZE];

Include/internal/pycore_interp_structs.h

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -452,16 +452,6 @@ typedef struct _PyOptimizationConfig {
452452
// Trace fitness thresholds
453453
uint16_t fitness_initial;
454454
uint16_t fitness_initial_side;
455-
uint16_t fitness_per_instruction;
456-
uint16_t fitness_branch_biased;
457-
uint16_t fitness_branch_unbiased;
458-
uint16_t fitness_backward_edge;
459-
uint16_t fitness_frame_entry;
460-
461-
// Exit quality thresholds for fitness-based trace termination
462-
uint16_t exit_quality_enter_executor;
463-
uint16_t exit_quality_default;
464-
uint16_t exit_quality_specializable;
465455

466456
// Optimization flags
467457
bool specialization_enabled;

Include/internal/pycore_optimizer.h

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -16,18 +16,17 @@ extern "C" {
1616
#include <stdbool.h>
1717

1818
/* Default fitness configuration values for trace quality control.
19-
* These can be overridden via PYTHON_JIT_FITNESS_* environment variables. */
19+
* FITNESS_INITIAL and FITNESS_INITIAL_SIDE can be overridden via
20+
* PYTHON_JIT_FITNESS_INITIAL and PYTHON_JIT_FITNESS_INITIAL_SIDE */
2021
#define FITNESS_PER_INSTRUCTION 2
21-
#define FITNESS_INITIAL (UOP_MAX_TRACE_LENGTH * FITNESS_PER_INSTRUCTION)
22+
#define FITNESS_INITIAL 2000
2223
#define FITNESS_INITIAL_SIDE 800
23-
#define FITNESS_BRANCH_BIASED 5
24-
#define FITNESS_BRANCH_UNBIASED 25
24+
#define FITNESS_BRANCH_BASE 5
2525
#define FITNESS_BACKWARD_EDGE 80
26-
#define FITNESS_FRAME_ENTRY 10
2726

28-
/* Default exit quality constants for fitness-based trace termination.
29-
* Higher values mean better places to stop the trace.
30-
* These can be overridden via PYTHON_JIT_EXIT_QUALITY_* environment variables. */
27+
/* Exit quality constants for fitness-based trace termination.
28+
* Higher values mean better places to stop the trace. */
29+
#define EXIT_QUALITY_CLOSE_LOOP 800
3130
#define EXIT_QUALITY_ENTER_EXECUTOR 500
3231
#define EXIT_QUALITY_DEFAULT 200
3332
#define EXIT_QUALITY_SPECIALIZABLE 50
@@ -119,9 +118,6 @@ typedef struct _PyJitTracerPreviousState {
119118

120119
typedef struct _PyJitTracerTranslatorState {
121120
int32_t fitness; // Current trace fitness, starts high, decrements
122-
int32_t best_exit_quality; // Best exit quality seen so far
123-
int best_exit_buffer_pos; // Position in code_buffer of best exit (-1=none)
124-
uint32_t best_exit_target; // Bytecode target of best exit point
125121
int frame_depth; // Current inline depth (0 = root frame)
126122
} _PyJitTracerTranslatorState;
127123

Python/optimizer.c

Lines changed: 40 additions & 116 deletions
Original file line numberDiff line numberDiff line change
@@ -596,86 +596,45 @@ add_to_trace(
596596
((uint32_t)((INSTR) - ((_Py_CODEUNIT *)(CODE)->co_code_adaptive)))
597597

598598

599-
/* Compute branch bias from the 16-bit branch history register.
600-
* Returns 0 (completely unpredictable, 50/50) to 8 (fully biased). */
599+
/* Compute branch fitness penalty based on how likely the traced path is.
600+
* The penalty is small when the traced path is common, large when rare.
601+
* A branch that historically goes the other way gets a heavy penalty. */
601602
static inline int
602-
compute_branch_bias(uint16_t history)
603+
compute_branch_penalty(uint16_t history, bool branch_taken)
603604
{
604-
int ones = _Py_popcount32((uint32_t)history);
605-
return abs(ones - 8);
605+
int taken_count = _Py_popcount32((uint32_t)history);
606+
int on_trace_count = branch_taken ? taken_count : 16 - taken_count;
607+
int off_trace = 16 - on_trace_count;
608+
/* Quadratic scaling: off_trace^2 ranges from 0 (fully biased our way)
609+
* to 256 (fully biased against us, e.g. 15/16 left but traced right). */
610+
return FITNESS_BRANCH_BASE + off_trace * off_trace;
606611
}
607612

608613
/* Compute exit quality for the current trace position.
609-
* Higher values mean it's a better place to stop the trace. */
614+
* Higher values mean better places to stop the trace. */
610615
static inline int32_t
611616
compute_exit_quality(_Py_CODEUNIT *target_instr, int opcode,
612-
const _PyOptimizationConfig *cfg)
617+
const _PyJitTracerState *tracer)
613618
{
619+
if (target_instr == tracer->initial_state.start_instr ||
620+
target_instr == tracer->initial_state.close_loop_instr) {
621+
return EXIT_QUALITY_CLOSE_LOOP;
622+
}
614623
if (target_instr->op.code == ENTER_EXECUTOR) {
615-
return (int32_t)cfg->exit_quality_enter_executor;
624+
return EXIT_QUALITY_ENTER_EXECUTOR;
616625
}
617626
if (_PyOpcode_Caches[_PyOpcode_Deopt[opcode]] > 0) {
618-
return (int32_t)cfg->exit_quality_specializable;
619-
}
620-
return (int32_t)cfg->exit_quality_default;
621-
}
622-
623-
/* Try to truncate the trace to the best recorded exit point.
624-
* Returns 1 if successful, 0 if no valid best exit exists.
625-
* Enforces progress constraints: the fallback position must satisfy
626-
* the minimum trace length requirements. */
627-
static inline int
628-
try_best_exit_fallback(
629-
_PyJitUopBuffer *trace,
630-
_PyJitTracerTranslatorState *ts,
631-
bool progress_needed)
632-
{
633-
int best_pos = ts->best_exit_buffer_pos;
634-
if (best_pos <= 0) {
635-
return 0;
636-
} else if (progress_needed && best_pos <= CODE_SIZE_NO_PROGRESS) {
637-
return 0;
638-
} else if (!progress_needed && best_pos <= CODE_SIZE_EMPTY) {
639-
return 0;
627+
return EXIT_QUALITY_SPECIALIZABLE;
640628
}
641-
trace->next = trace->start + best_pos;
642-
/* Caller must add terminator (_EXIT_TRACE) after this */
643-
return 1;
629+
return EXIT_QUALITY_DEFAULT;
644630
}
645631

646-
/* Update trace fitness after translating one bytecode instruction. */
647-
static inline void
648-
update_trace_fitness(
649-
_PyJitTracerTranslatorState *ts,
650-
int opcode,
651-
_Py_CODEUNIT *target_instr,
652-
const _PyOptimizationConfig *cfg)
632+
static inline int32_t
633+
compute_frame_penalty(const _PyOptimizationConfig *cfg)
653634
{
654-
ts->fitness -= cfg->fitness_per_instruction;
655-
656-
switch (opcode) {
657-
case POP_JUMP_IF_FALSE:
658-
case POP_JUMP_IF_TRUE:
659-
case POP_JUMP_IF_NONE:
660-
case POP_JUMP_IF_NOT_NONE: {
661-
int bias = compute_branch_bias(target_instr[1].cache);
662-
/* Linear interpolation: bias 0 → unbiased penalty, bias 8 → biased penalty */
663-
int penalty = cfg->fitness_branch_unbiased
664-
- (bias * (cfg->fitness_branch_unbiased - cfg->fitness_branch_biased)) / 8;
665-
ts->fitness -= penalty;
666-
break;
667-
}
668-
case JUMP_BACKWARD:
669-
case JUMP_BACKWARD_JIT:
670-
case JUMP_BACKWARD_NO_JIT:
671-
ts->fitness -= cfg->fitness_backward_edge;
672-
break;
673-
default:
674-
break;
675-
}
635+
return (int32_t)cfg->fitness_initial / 5 + 1;
676636
}
677637

678-
679638
static int
680639
is_terminator(const _PyUOpInstruction *uop)
681640
{
@@ -812,20 +771,6 @@ _PyJit_translate_single_bytecode_to_trace(
812771
DPRINTF(2, "Unsupported: oparg too large\n");
813772
unsupported:
814773
{
815-
// If we have a high-quality best_exit (enter_executor, etc.),
816-
// prefer it over rewinding to last _SET_IP — this covers the
817-
// main unsupported path, not just the edge case.
818-
_PyJitTracerTranslatorState *ts_unsup = &tracer->translator_state;
819-
if (ts_unsup->best_exit_quality > (int32_t)tstate->interp->opt_config.exit_quality_default &&
820-
try_best_exit_fallback(trace, ts_unsup, progress_needed)) {
821-
ADD_TO_TRACE(_EXIT_TRACE, 0, 0, ts_unsup->best_exit_target);
822-
uop_buffer_last(trace)->operand1 = true; // is_control_flow
823-
OPT_STAT_INC(best_exit_fallback);
824-
DPRINTF(2, "Best-exit fallback at unsupported (pos=%d, quality=%d)\n",
825-
ts_unsup->best_exit_buffer_pos, ts_unsup->best_exit_quality);
826-
goto done;
827-
}
828-
// Fall back: rewind to last _SET_IP and replace with _DEOPT.
829774
_PyUOpInstruction *curr = uop_buffer_last(trace);
830775
while (curr->opcode != _SET_IP && uop_buffer_length(trace) > 2) {
831776
trace->next--;
@@ -855,31 +800,13 @@ _PyJit_translate_single_bytecode_to_trace(
855800

856801
// Fitness-based trace quality check (before reserving space for this instruction)
857802
_PyJitTracerTranslatorState *ts = &tracer->translator_state;
858-
int32_t eq = compute_exit_quality(target_instr, opcode,
859-
&tstate->interp->opt_config);
860-
861-
// Record best exit candidate.
862-
// Only record after minimum progress to avoid truncating to near-empty traces.
863-
if (eq > ts->best_exit_quality &&
864-
uop_buffer_length(trace) > CODE_SIZE_NO_PROGRESS) {
865-
ts->best_exit_quality = eq;
866-
ts->best_exit_buffer_pos = uop_buffer_length(trace);
867-
ts->best_exit_target = target;
868-
}
803+
int32_t eq = compute_exit_quality(target_instr, opcode, tracer);
869804

870805
// Check if fitness is depleted — should we stop the trace?
871-
if (ts->fitness < eq &&
872-
!(progress_needed && uop_buffer_length(trace) < CODE_SIZE_NO_PROGRESS)) {
873-
// Prefer stopping at the best recorded exit point
874-
if (try_best_exit_fallback(trace, ts, progress_needed)) {
875-
ADD_TO_TRACE(_EXIT_TRACE, 0, 0, ts->best_exit_target);
876-
uop_buffer_last(trace)->operand1 = true; // is_control_flow
877-
}
878-
else {
879-
// No valid best exit — stop at current position
880-
ADD_TO_TRACE(_EXIT_TRACE, 0, 0, target);
881-
uop_buffer_last(trace)->operand1 = true; // is_control_flow
882-
}
806+
if (ts->fitness < eq) {
807+
// This is a tracer heuristic rather than normal program control flow,
808+
// so leave operand1 clear and let the resulting side exit increase chain_depth.
809+
ADD_TO_TRACE(_EXIT_TRACE, 0, 0, target);
883810
OPT_STAT_INC(fitness_terminated_traces);
884811
DPRINTF(2, "Fitness terminated: fitness=%d < exit_quality=%d\n",
885812
ts->fitness, eq);
@@ -916,12 +843,6 @@ _PyJit_translate_single_bytecode_to_trace(
916843
DPRINTF(2, "No room for expansions and guards (need %d, got %d)\n",
917844
space_needed, uop_buffer_remaining_space(trace));
918845
OPT_STAT_INC(trace_too_long);
919-
// Try best-exit fallback before giving up
920-
if (try_best_exit_fallback(trace, &tracer->translator_state, progress_needed)) {
921-
ADD_TO_TRACE(_EXIT_TRACE, 0, 0, tracer->translator_state.best_exit_target);
922-
uop_buffer_last(trace)->operand1 = true; // is_control_flow
923-
OPT_STAT_INC(best_exit_fallback);
924-
}
925846
goto done;
926847
}
927848

@@ -945,13 +866,16 @@ _PyJit_translate_single_bytecode_to_trace(
945866
assert(jump_happened ? (next_instr == computed_jump_instr) : (next_instr == computed_next_instr));
946867
uint32_t uopcode = BRANCH_TO_GUARD[opcode - POP_JUMP_IF_FALSE][jump_happened];
947868
ADD_TO_TRACE(uopcode, 0, 0, INSTR_IP(jump_happened ? computed_next_instr : computed_jump_instr, old_code));
869+
tracer->translator_state.fitness -= compute_branch_penalty(
870+
target_instr[1].cache, jump_happened);
948871
break;
949872
}
950873
case JUMP_BACKWARD_JIT:
951874
// This is possible as the JIT might have re-activated after it was disabled
952875
case JUMP_BACKWARD_NO_JIT:
953876
case JUMP_BACKWARD:
954877
ADD_TO_TRACE(_CHECK_PERIODIC, 0, 0, target);
878+
tracer->translator_state.fitness -= FITNESS_BACKWARD_EDGE;
955879
_Py_FALLTHROUGH;
956880
case JUMP_BACKWARD_NO_INTERRUPT:
957881
{
@@ -1084,15 +1008,19 @@ _PyJit_translate_single_bytecode_to_trace(
10841008
ts_depth->frame_depth);
10851009
goto unsupported;
10861010
}
1087-
int32_t penalty = (int32_t)tstate->interp->opt_config.fitness_frame_entry
1088-
* ts_depth->frame_depth;
1089-
ts_depth->fitness -= penalty;
1011+
int32_t frame_penalty = compute_frame_penalty(&tstate->interp->opt_config);
1012+
ts_depth->fitness -= frame_penalty * ts_depth->frame_depth;
10901013
}
10911014
else if (uop == _RETURN_VALUE || uop == _RETURN_GENERATOR || uop == _YIELD_VALUE) {
10921015
_PyJitTracerTranslatorState *ts_depth = &tracer->translator_state;
1016+
int32_t frame_penalty = compute_frame_penalty(&tstate->interp->opt_config);
10931017
if (ts_depth->frame_depth <= 0) {
1094-
// Underflow
1095-
ts_depth->fitness -= (int32_t)tstate->interp->opt_config.fitness_frame_entry * 2;
1018+
// Underflow: returning from a frame we didn't enter
1019+
ts_depth->fitness -= frame_penalty * 2;
1020+
}
1021+
else {
1022+
// Reward returning: small inlined calls should be encouraged
1023+
ts_depth->fitness += frame_penalty;
10961024
}
10971025
ts_depth->frame_depth = ts_depth->frame_depth <= 0 ? 0 : ts_depth->frame_depth - 1;
10981026
}
@@ -1140,8 +1068,7 @@ _PyJit_translate_single_bytecode_to_trace(
11401068
// Update fitness AFTER translation, BEFORE returning to continue tracing.
11411069
// This ensures the next iteration's fitness check reflects the cost of
11421070
// all instructions translated so far.
1143-
update_trace_fitness(&tracer->translator_state, opcode, target_instr,
1144-
&tstate->interp->opt_config);
1071+
tracer->translator_state.fitness -= FITNESS_PER_INSTRUCTION;
11451072
DPRINTF(2, "Trace continuing (fitness=%d)\n", tracer->translator_state.fitness);
11461073
return 1;
11471074
done:
@@ -1232,9 +1159,6 @@ _PyJit_TryInitializeTracing(
12321159
ts->fitness = is_side_trace
12331160
? (int32_t)cfg->fitness_initial_side
12341161
: (int32_t)cfg->fitness_initial;
1235-
ts->best_exit_quality = 0;
1236-
ts->best_exit_buffer_pos = -1;
1237-
ts->best_exit_target = 0;
12381162
ts->frame_depth = 0;
12391163

12401164
tracer->is_tracing = true;

Python/pystate.c

Lines changed: 8 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -642,32 +642,14 @@ init_interpreter(PyInterpreterState *interp,
642642
init_policy(&interp->opt_config.fitness_initial_side,
643643
"PYTHON_JIT_FITNESS_INITIAL_SIDE",
644644
FITNESS_INITIAL_SIDE, 50, 5000);
645-
init_policy(&interp->opt_config.fitness_per_instruction,
646-
"PYTHON_JIT_FITNESS_PER_INSTRUCTION",
647-
FITNESS_PER_INSTRUCTION, 0, 100);
648-
init_policy(&interp->opt_config.fitness_branch_biased,
649-
"PYTHON_JIT_FITNESS_BRANCH_BIASED",
650-
FITNESS_BRANCH_BIASED, 0, 500);
651-
init_policy(&interp->opt_config.fitness_branch_unbiased,
652-
"PYTHON_JIT_FITNESS_BRANCH_UNBIASED",
653-
FITNESS_BRANCH_UNBIASED, 0, 500);
654-
init_policy(&interp->opt_config.fitness_backward_edge,
655-
"PYTHON_JIT_FITNESS_BACKWARD_EDGE",
656-
FITNESS_BACKWARD_EDGE, 0, 1000);
657-
init_policy(&interp->opt_config.fitness_frame_entry,
658-
"PYTHON_JIT_FITNESS_FRAME_ENTRY",
659-
FITNESS_FRAME_ENTRY, 0, 1000);
660-
661-
// Exit quality thresholds
662-
init_policy(&interp->opt_config.exit_quality_enter_executor,
663-
"PYTHON_JIT_EXIT_QUALITY_ENTER_EXECUTOR",
664-
EXIT_QUALITY_ENTER_EXECUTOR, 0, 10000);
665-
init_policy(&interp->opt_config.exit_quality_default,
666-
"PYTHON_JIT_EXIT_QUALITY_DEFAULT",
667-
EXIT_QUALITY_DEFAULT, 0, 10000);
668-
init_policy(&interp->opt_config.exit_quality_specializable,
669-
"PYTHON_JIT_EXIT_QUALITY_SPECIALIZABLE",
670-
EXIT_QUALITY_SPECIALIZABLE, 0, 10000);
645+
/* The tracer starts at start_instr, so initial fitness must not be below
646+
* the close-loop exit quality or tracing will terminate immediately. */
647+
if (interp->opt_config.fitness_initial < EXIT_QUALITY_CLOSE_LOOP) {
648+
interp->opt_config.fitness_initial = EXIT_QUALITY_CLOSE_LOOP;
649+
}
650+
if (interp->opt_config.fitness_initial_side < EXIT_QUALITY_CLOSE_LOOP) {
651+
interp->opt_config.fitness_initial_side = EXIT_QUALITY_CLOSE_LOOP;
652+
}
671653

672654
interp->opt_config.specialization_enabled = !is_env_enabled("PYTHON_SPECIALIZATION_OFF");
673655
interp->opt_config.uops_optimize_enabled = !is_env_disabled("PYTHON_UOPS_OPTIMIZE");

Python/pystats.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -275,7 +275,6 @@ print_optimization_stats(FILE *out, OptimizationStats *stats)
275275
fprintf(out, "Optimization unknown callee: %" PRIu64 "\n", stats->unknown_callee);
276276
fprintf(out, "Executors invalidated: %" PRIu64 "\n", stats->executors_invalidated);
277277
fprintf(out, "Optimization fitness terminated: %" PRIu64 "\n", stats->fitness_terminated_traces);
278-
fprintf(out, "Optimization best exit fallback: %" PRIu64 "\n", stats->best_exit_fallback);
279278

280279
print_histogram(out, "Trace length", stats->trace_length_hist);
281280
print_histogram(out, "Trace run length", stats->trace_run_length_hist);

0 commit comments

Comments
 (0)