2828
2929#define MAX_EXECUTORS_SIZE 256
3030
31+ _Static_assert (FITNESS_INITIAL < UOP_MAX_TRACE_LENGTH ,
32+ "FITNESS_INITIAL must be less than UOP_MAX_TRACE_LENGTH so that "
33+ "per-slot fitness charging guarantees the buffer never overflows" );
34+
3135// Trace too short, no progress:
3236// _START_EXECUTOR
3337// _MAKE_WARM
@@ -596,19 +600,15 @@ add_to_trace(
596600 ((uint32_t)((INSTR) - ((_Py_CODEUNIT *)(CODE)->co_code_adaptive)))
597601
598602
599- /* Compute branch fitness penalty based on how likely the traced path is.
600- * The penalty is small when the traced path is common, large when rare.
601- * A branch that historically goes the other way gets a heavy penalty. */
603+ /* Branch penalty: 0 if fully biased, FITNESS_BRANCH_BALANCED if 50/50,
604+ * 2*FITNESS_BRANCH_BALANCED if fully against the traced direction. */
602605static inline int
603606compute_branch_penalty (uint16_t history , bool branch_taken )
604607{
605608 int taken_count = _Py_popcount32 ((uint32_t )history );
606609 int on_trace_count = branch_taken ? taken_count : 16 - taken_count ;
607610 int off_trace = 16 - on_trace_count ;
608- /* Linear scaling: off_trace ranges from 0 (fully biased our way)
609- * to 16 (fully biased against us), so the penalty ranges from
610- * FITNESS_BRANCH_BASE to FITNESS_BRANCH_BASE + 32. */
611- return FITNESS_BRANCH_BASE + off_trace * 2 ;
611+ return off_trace * FITNESS_BRANCH_BALANCED / 8 ;
612612}
613613
614614/* Compute exit quality for the current trace position.
@@ -630,10 +630,11 @@ compute_exit_quality(_Py_CODEUNIT *target_instr, int opcode,
630630 return EXIT_QUALITY_DEFAULT ;
631631}
632632
633+ /* Frame penalty: (MAX_ABSTRACT_FRAME_DEPTH-1) pushes exhaust fitness. */
633634static inline int32_t
634- compute_frame_penalty (const _PyOptimizationConfig * cfg )
635+ compute_frame_penalty (uint16_t fitness_initial )
635636{
636- return (int32_t )cfg -> fitness_initial / 30 + 1 ;
637+ return (int32_t )fitness_initial / ( MAX_ABSTRACT_FRAME_DEPTH - 1 ) + 1 ;
637638}
638639
639640static int
@@ -799,23 +800,26 @@ _PyJit_translate_single_bytecode_to_trace(
799800 return 1 ;
800801 }
801802
802- // Fitness-based trace quality check (before reserving space for this instruction)
803+ // Stop the trace if fitness has dropped below the exit quality threshold.
803804 _PyJitTracerTranslatorState * ts = & tracer -> translator_state ;
804805 int32_t eq = compute_exit_quality (target_instr , opcode , tracer );
805806 DPRINTF (3 , "Fitness check: %s(%d) fitness=%d, exit_quality=%d, depth=%d\n" ,
806807 _PyOpcode_OpName [opcode ], oparg , ts -> fitness , eq , ts -> frame_depth );
807808
808- // Check if fitness is depleted — should we stop the trace?
809809 if (ts -> fitness < eq ) {
810- // This is a tracer heuristic rather than normal program control flow,
811- // so leave operand1 clear and let the resulting side exit increase chain_depth.
810+ // Heuristic exit: leave operand1=0 so the side exit increments chain_depth.
812811 ADD_TO_TRACE (_EXIT_TRACE , 0 , 0 , target );
813812 OPT_STAT_INC (fitness_terminated_traces );
814813 DPRINTF (2 , "Fitness terminated: %s(%d) fitness=%d < exit_quality=%d\n" ,
815814 _PyOpcode_OpName [opcode ], oparg , ts -> fitness , eq );
816815 goto done ;
817816 }
818817
818+ // Snapshot the buffer before reserving tail slots. The later charge
819+ // includes both emitted uops and capacity reserved for exits/deopts/errors.
820+ _PyUOpInstruction * next_before = trace -> next ;
821+ _PyUOpInstruction * end_before = trace -> end ;
822+
819823 // One for possible _DEOPT, one because _CHECK_VALIDITY itself might _DEOPT
820824 trace -> end -= 2 ;
821825
@@ -880,29 +884,13 @@ _PyJit_translate_single_bytecode_to_trace(
880884 tracer -> translator_state .fitness -= FITNESS_BACKWARD_EDGE ;
881885 DPRINTF (3 , " backward edge penalty: -%d -> fitness=%d\n" ,
882886 FITNESS_BACKWARD_EDGE , tracer -> translator_state .fitness );
883- _Py_FALLTHROUGH ;
887+ break ;
884888 case JUMP_BACKWARD_NO_INTERRUPT :
885- {
886- if ((next_instr != tracer -> initial_state .close_loop_instr ) &&
887- (next_instr != tracer -> initial_state .start_instr ) &&
888- uop_buffer_length (& tracer -> code_buffer ) > CODE_SIZE_NO_PROGRESS &&
889- // For side exits, we don't want to terminate them early.
890- tracer -> initial_state .exit == NULL &&
891- // These are coroutines, and we want to unroll those usually.
892- opcode != JUMP_BACKWARD_NO_INTERRUPT ) {
893- // We encountered a JUMP_BACKWARD but not to the top of our own loop.
894- // We don't want to continue tracing as we might get stuck in the
895- // inner loop. Instead, end the trace where the executor of the
896- // inner loop might start and let the traces rejoin.
897- OPT_STAT_INC (inner_loop );
898- ADD_TO_TRACE (_EXIT_TRACE , 0 , 0 , target );
899- uop_buffer_last (trace )-> operand1 = true; // is_control_flow
900- DPRINTF (2 , "JUMP_BACKWARD not to top ends trace %p %p %p\n" , next_instr ,
901- tracer -> initial_state .close_loop_instr , tracer -> initial_state .start_instr );
902- goto done ;
903- }
889+ tracer -> translator_state .fitness -= FITNESS_BACKWARD_EDGE_COROUTINE ;
890+ DPRINTF (3 , " coroutine backward edge penalty: -%d -> fitness=%d\n" ,
891+ FITNESS_BACKWARD_EDGE_COROUTINE ,
892+ tracer -> translator_state .fitness );
904893 break ;
905- }
906894
907895 case RESUME :
908896 case RESUME_CHECK :
@@ -1006,32 +994,31 @@ _PyJit_translate_single_bytecode_to_trace(
1006994 _PyJitTracerTranslatorState * ts_depth = & tracer -> translator_state ;
1007995 ts_depth -> frame_depth ++ ;
1008996 assert (ts_depth -> frame_depth < MAX_ABSTRACT_FRAME_DEPTH );
1009- int32_t frame_penalty = compute_frame_penalty (& tstate -> interp -> opt_config );
1010- int32_t cost = frame_penalty * ts_depth -> frame_depth ;
1011- ts_depth -> fitness -= cost ;
1012- DPRINTF (3 , " _PUSH_FRAME: depth=%d, penalty=-%d (per_frame=%d) -> fitness=%d\n" ,
1013- ts_depth -> frame_depth , cost , frame_penalty ,
997+ int32_t frame_penalty = compute_frame_penalty (tstate -> interp -> opt_config .fitness_initial );
998+ ts_depth -> fitness -= frame_penalty ;
999+ DPRINTF (3 , " _PUSH_FRAME: depth=%d, penalty=-%d -> fitness=%d\n" ,
1000+ ts_depth -> frame_depth , frame_penalty ,
10141001 ts_depth -> fitness );
10151002 }
10161003 else if (uop == _RETURN_VALUE || uop == _RETURN_GENERATOR || uop == _YIELD_VALUE ) {
10171004 _PyJitTracerTranslatorState * ts_depth = & tracer -> translator_state ;
1018- int32_t frame_penalty = compute_frame_penalty (& tstate -> interp -> opt_config );
1005+ int32_t frame_penalty = compute_frame_penalty (tstate -> interp -> opt_config . fitness_initial );
10191006 if (ts_depth -> frame_depth <= 0 ) {
1020- // Underflow: returning from a frame we didn't enter
1021- ts_depth -> fitness -= frame_penalty * 2 ;
1007+ // Returning from a frame we didn't enter — penalize.
1008+ ts_depth -> fitness -= frame_penalty ;
10221009 DPRINTF (3 , " %s: underflow penalty=-%d -> fitness=%d\n" ,
1023- _PyOpcode_uop_name [uop ], frame_penalty * 2 ,
1010+ _PyOpcode_uop_name [uop ], frame_penalty ,
10241011 ts_depth -> fitness );
10251012 }
10261013 else {
1027- // Reward returning: small inlined calls should be encouraged
1014+ // Symmetric with push: net-zero frame impact.
10281015 ts_depth -> fitness += frame_penalty ;
1016+ ts_depth -> frame_depth -- ;
10291017 DPRINTF (3 , " %s: return reward=+%d, depth=%d -> fitness=%d\n" ,
10301018 _PyOpcode_uop_name [uop ], frame_penalty ,
1031- ts_depth -> frame_depth - 1 ,
1019+ ts_depth -> frame_depth ,
10321020 ts_depth -> fitness );
10331021 }
1034- ts_depth -> frame_depth = ts_depth -> frame_depth <= 0 ? 0 : ts_depth -> frame_depth - 1 ;
10351022 }
10361023 else if (_PyUop_Flags [uop ] & HAS_RECORDS_VALUE_FLAG ) {
10371024 PyObject * recorded_value = tracer -> prev_state .recorded_value ;
@@ -1074,12 +1061,17 @@ _PyJit_translate_single_bytecode_to_trace(
10741061 ADD_TO_TRACE (_JUMP_TO_TOP , 0 , 0 , 0 );
10751062 goto done ;
10761063 }
1077- // Update fitness AFTER translation, BEFORE returning to continue tracing.
1078- // This ensures the next iteration's fitness check reflects the cost of
1079- // all instructions translated so far.
1080- tracer -> translator_state .fitness -= FITNESS_PER_INSTRUCTION ;
1081- DPRINTF (3 , " per-insn cost: -%d -> fitness=%d\n" ,
1082- FITNESS_PER_INSTRUCTION , tracer -> translator_state .fitness );
1064+ // Charge fitness by trace-buffer capacity consumed for this bytecode,
1065+ // including both emitted uops and tail reservations.
1066+ {
1067+ int32_t slots_fwd = (int32_t )(trace -> next - next_before );
1068+ int32_t slots_rev = (int32_t )(end_before - trace -> end );
1069+ int32_t slots_used = slots_fwd + slots_rev ;
1070+ tracer -> translator_state .fitness -= slots_used ;
1071+ DPRINTF (3 , " per-insn cost: -%d (fwd=%d, rev=%d) -> fitness=%d\n" ,
1072+ slots_used , slots_fwd , slots_rev ,
1073+ tracer -> translator_state .fitness );
1074+ }
10831075 DPRINTF (2 , "Trace continuing (fitness=%d)\n" , tracer -> translator_state .fitness );
10841076 return 1 ;
10851077done :
@@ -1163,16 +1155,15 @@ _PyJit_TryInitializeTracing(
11631155 assert (curr_instr -> op .code == JUMP_BACKWARD_JIT || curr_instr -> op .code == RESUME_CHECK_JIT || (exit != NULL ));
11641156 tracer -> initial_state .jump_backward_instr = curr_instr ;
11651157
1166- // Initialize fitness tracking state
1158+ // Reduce side-trace fitness as chain depth grows, but clamp the reduction
1159+ // after depth 4 so deeply chained exits still have at least half budget.
11671160 const _PyOptimizationConfig * cfg = & tstate -> interp -> opt_config ;
11681161 _PyJitTracerTranslatorState * ts = & tracer -> translator_state ;
1169- bool is_side_trace = (exit != NULL );
1170- ts -> fitness = is_side_trace
1171- ? (int32_t )cfg -> fitness_initial_side
1172- : (int32_t )cfg -> fitness_initial ;
1162+ int effective_depth = Py_MIN (chain_depth , 4 );
1163+ ts -> fitness = (int32_t )((8 - effective_depth ) * cfg -> fitness_initial / 8 );
11731164 ts -> frame_depth = 0 ;
1174- DPRINTF (3 , "Fitness init: %s trace , fitness=%d\n" ,
1175- is_side_trace ? "side" : "root" , ts -> fitness );
1165+ DPRINTF (3 , "Fitness init: chain_depth=%d , fitness=%d\n" ,
1166+ chain_depth , ts -> fitness );
11761167
11771168 tracer -> is_tracing = true;
11781169 return 1 ;
0 commit comments