@@ -549,8 +549,11 @@ dynamic_exit_uop[MAX_UOP_ID + 1] = {
549549};
550550
551551
552- #define CONFIDENCE_RANGE 1000
553- #define CONFIDENCE_CUTOFF 333
552+ /* Exit quality constants for fitness-based trace termination.
553+ * Higher values mean better places to stop the trace. */
554+ #define EXIT_QUALITY_ENTER_EXECUTOR 500 // An executor already exists here
555+ #define EXIT_QUALITY_DEFAULT 200 // Ordinary bytecode position
556+ #define EXIT_QUALITY_SPECIALIZABLE 50 // Specializable instruction — avoid stopping here
554557
555558#ifdef Py_DEBUG
556559#define DPRINTF (level , ...) \
@@ -598,6 +601,86 @@ add_to_trace(
598601 ((uint32_t)((INSTR) - ((_Py_CODEUNIT *)(CODE)->co_code_adaptive)))
599602
600603
604+ /* Compute branch bias from the 16-bit branch history register.
605+ * Returns 0 (completely unpredictable, 50/50) to 8 (fully biased). */
606+ static inline int
607+ compute_branch_bias (uint16_t history )
608+ {
609+ int ones = _Py_popcount32 ((uint32_t )history );
610+ return abs (ones - 8 );
611+ }
612+
613+ /* Compute exit quality for the current trace position.
614+ * Higher values mean it's a better place to stop the trace. */
615+ static inline int32_t
616+ compute_exit_quality (_Py_CODEUNIT * target_instr , int opcode )
617+ {
618+ if (target_instr -> op .code == ENTER_EXECUTOR ) {
619+ return EXIT_QUALITY_ENTER_EXECUTOR ;
620+ }
621+ if (_PyOpcode_Caches [_PyOpcode_Deopt [opcode ]] > 0 ) {
622+ return EXIT_QUALITY_SPECIALIZABLE ;
623+ }
624+ return EXIT_QUALITY_DEFAULT ;
625+ }
626+
627+ /* Try to truncate the trace to the best recorded exit point.
628+ * Returns 1 if successful, 0 if no valid best exit exists.
629+ * Enforces progress constraints: the fallback position must satisfy
630+ * the minimum trace length requirements. */
631+ static inline int
632+ try_best_exit_fallback (
633+ _PyJitUopBuffer * trace ,
634+ _PyJitTracerTranslatorState * ts ,
635+ bool progress_needed )
636+ {
637+ int best_pos = ts -> best_exit_buffer_pos ;
638+ if (best_pos <= 0 ) {
639+ return 0 ;
640+ } else if (progress_needed && best_pos <= CODE_SIZE_NO_PROGRESS ) {
641+ return 0 ;
642+ } else if (!progress_needed && best_pos <= CODE_SIZE_EMPTY ) {
643+ return 0 ;
644+ }
645+ trace -> next = trace -> start + best_pos ;
646+ /* Caller must add terminator (_EXIT_TRACE) after this */
647+ return 1 ;
648+ }
649+
650+ /* Update trace fitness after translating one bytecode instruction. */
651+ static inline void
652+ update_trace_fitness (
653+ _PyJitTracerTranslatorState * ts ,
654+ int opcode ,
655+ _Py_CODEUNIT * target_instr ,
656+ const _PyOptimizationConfig * cfg )
657+ {
658+ ts -> fitness -= cfg -> fitness_per_instruction ;
659+
660+ switch (opcode ) {
661+ case POP_JUMP_IF_FALSE :
662+ case POP_JUMP_IF_TRUE :
663+ case POP_JUMP_IF_NONE :
664+ case POP_JUMP_IF_NOT_NONE : {
665+ int bias = compute_branch_bias (target_instr [1 ].cache );
666+ /* Linear interpolation: bias 0 → unbiased penalty, bias 8 → biased penalty */
667+ int penalty = cfg -> fitness_branch_unbiased
668+ - (bias * (cfg -> fitness_branch_unbiased - cfg -> fitness_branch_biased )) / 8 ;
669+ ts -> fitness -= penalty ;
670+ break ;
671+ }
672+ case JUMP_BACKWARD :
673+ case JUMP_BACKWARD_JIT :
674+ case JUMP_BACKWARD_NO_JIT :
675+ ts -> fitness -= cfg -> fitness_backward_edge ;
676+ break ;
677+ /* JUMP_BACKWARD_NO_INTERRUPT: exempt from backward edge penalty (coroutines) */
678+ default :
679+ break ;
680+ }
681+ }
682+
683+
601684static int
602685is_terminator (const _PyUOpInstruction * uop )
603686{
@@ -730,17 +813,46 @@ _PyJit_translate_single_bytecode_to_trace(
730813 goto unsupported ;
731814 }
732815
816+ // Track frame depth changes for fitness (only for supported frame transitions)
817+ if (frame != tracer -> prev_state .instr_frame ) {
818+ _PyJitTracerTranslatorState * ts_depth = & tracer -> translator_state ;
819+ if (frame -> previous == tracer -> prev_state .instr_frame ) {
820+ // Entered a deeper frame (function call inlined)
821+ ts_depth -> frame_depth ++ ;
822+ // Penalty scales with depth: shallow inlining is cheap,
823+ // deep inlining gets progressively more expensive.
824+ int32_t penalty = (int32_t )tstate -> interp -> opt_config .fitness_frame_entry
825+ * ts_depth -> frame_depth ;
826+ ts_depth -> fitness -= penalty ;
827+ } else if (ts_depth -> frame_depth > 0 ) {
828+ // Returned to a shallower frame
829+ ts_depth -> frame_depth -- ;
830+ }
831+ }
832+
733833 if (oparg > 0xFFFF ) {
734834 DPRINTF (2 , "Unsupported: oparg too large\n" );
735835 unsupported :
736836 {
737- // Rewind to previous instruction and replace with _EXIT_TRACE.
837+ // If we have a high-quality best_exit (enter_executor, etc.),
838+ // prefer it over rewinding to last _SET_IP — this covers the
839+ // main unsupported path, not just the edge case.
840+ _PyJitTracerTranslatorState * ts_unsup = & tracer -> translator_state ;
841+ if (ts_unsup -> best_exit_quality > EXIT_QUALITY_DEFAULT &&
842+ try_best_exit_fallback (trace , ts_unsup , progress_needed )) {
843+ ADD_TO_TRACE (_EXIT_TRACE , 0 , 0 , ts_unsup -> best_exit_target );
844+ uop_buffer_last (trace )-> operand1 = true; // is_control_flow
845+ OPT_STAT_INC (best_exit_fallback );
846+ DPRINTF (2 , "Best-exit fallback at unsupported (pos=%d, quality=%d)\n" ,
847+ ts_unsup -> best_exit_buffer_pos , ts_unsup -> best_exit_quality );
848+ goto done ;
849+ }
850+ // Fall back: rewind to last _SET_IP and replace with _DEOPT.
738851 _PyUOpInstruction * curr = uop_buffer_last (trace );
739852 while (curr -> opcode != _SET_IP && uop_buffer_length (trace ) > 2 ) {
740853 trace -> next -- ;
741854 curr = uop_buffer_last (trace );
742855 }
743- assert (curr -> opcode == _SET_IP || uop_buffer_length (trace ) == 2 );
744856 if (curr -> opcode == _SET_IP ) {
745857 int32_t old_target = (int32_t )uop_get_target (curr );
746858 curr -> opcode = _DEOPT ;
@@ -763,6 +875,40 @@ _PyJit_translate_single_bytecode_to_trace(
763875 return 1 ;
764876 }
765877
878+ // Fitness-based trace quality check (before reserving space for this instruction)
879+ {
880+ _PyJitTracerTranslatorState * ts = & tracer -> translator_state ;
881+ int32_t eq = compute_exit_quality (target_instr , opcode );
882+
883+ // Record best exit candidate.
884+ // Only record after minimum progress to avoid truncating to near-empty traces.
885+ if (eq > ts -> best_exit_quality &&
886+ uop_buffer_length (trace ) > CODE_SIZE_NO_PROGRESS ) {
887+ ts -> best_exit_quality = eq ;
888+ ts -> best_exit_buffer_pos = uop_buffer_length (trace );
889+ ts -> best_exit_target = target ;
890+ }
891+
892+ // Check if fitness is depleted — should we stop the trace?
893+ if (ts -> fitness < eq &&
894+ !(progress_needed && uop_buffer_length (trace ) < CODE_SIZE_NO_PROGRESS )) {
895+ // Prefer stopping at the best recorded exit point
896+ if (try_best_exit_fallback (trace , ts , progress_needed )) {
897+ ADD_TO_TRACE (_EXIT_TRACE , 0 , 0 , ts -> best_exit_target );
898+ uop_buffer_last (trace )-> operand1 = true; // is_control_flow
899+ }
900+ else {
901+ // No valid best exit — stop at current position
902+ ADD_TO_TRACE (_EXIT_TRACE , 0 , 0 , target );
903+ uop_buffer_last (trace )-> operand1 = true; // is_control_flow
904+ }
905+ OPT_STAT_INC (fitness_terminated_traces );
906+ DPRINTF (2 , "Fitness terminated: fitness=%d < exit_quality=%d\n" ,
907+ ts -> fitness , eq );
908+ goto done ;
909+ }
910+ }
911+
766912 // One for possible _DEOPT, one because _CHECK_VALIDITY itself might _DEOPT
767913 trace -> end -= 2 ;
768914
@@ -793,6 +939,12 @@ _PyJit_translate_single_bytecode_to_trace(
793939 DPRINTF (2 , "No room for expansions and guards (need %d, got %d)\n" ,
794940 space_needed , uop_buffer_remaining_space (trace ));
795941 OPT_STAT_INC (trace_too_long );
942+ // Try best-exit fallback before giving up
943+ if (try_best_exit_fallback (trace , & tracer -> translator_state , progress_needed )) {
944+ ADD_TO_TRACE (_EXIT_TRACE , 0 , 0 , tracer -> translator_state .best_exit_target );
945+ uop_buffer_last (trace )-> operand1 = true; // is_control_flow
946+ OPT_STAT_INC (best_exit_fallback );
947+ }
796948 goto done ;
797949 }
798950
@@ -986,7 +1138,12 @@ _PyJit_translate_single_bytecode_to_trace(
9861138 ADD_TO_TRACE (_JUMP_TO_TOP , 0 , 0 , 0 );
9871139 goto done ;
9881140 }
989- DPRINTF (2 , "Trace continuing\n" );
1141+ // Update fitness AFTER translation, BEFORE returning to continue tracing.
1142+ // This ensures the next iteration's fitness check reflects the cost of
1143+ // all instructions translated so far.
1144+ update_trace_fitness (& tracer -> translator_state , opcode , target_instr ,
1145+ & tstate -> interp -> opt_config );
1146+ DPRINTF (2 , "Trace continuing (fitness=%d)\n" , tracer -> translator_state .fitness );
9901147 return 1 ;
9911148done :
9921149 DPRINTF (2 , "Trace done\n" );
@@ -1069,6 +1226,18 @@ _PyJit_TryInitializeTracing(
10691226 assert (curr_instr -> op .code == JUMP_BACKWARD_JIT || curr_instr -> op .code == RESUME_CHECK_JIT || (exit != NULL ));
10701227 tracer -> initial_state .jump_backward_instr = curr_instr ;
10711228
1229+ // Initialize fitness tracking state
1230+ const _PyOptimizationConfig * cfg = & tstate -> interp -> opt_config ;
1231+ _PyJitTracerTranslatorState * ts = & tracer -> translator_state ;
1232+ bool is_side_trace = (exit != NULL );
1233+ ts -> fitness = is_side_trace
1234+ ? (int32_t )cfg -> fitness_initial_side
1235+ : (int32_t )cfg -> fitness_initial ;
1236+ ts -> best_exit_quality = 0 ;
1237+ ts -> best_exit_buffer_pos = -1 ;
1238+ ts -> best_exit_target = 0 ;
1239+ ts -> frame_depth = 0 ;
1240+
10721241 tracer -> is_tracing = true;
10731242 return 1 ;
10741243}
0 commit comments