@@ -596,86 +596,45 @@ add_to_trace(
596596 ((uint32_t)((INSTR) - ((_Py_CODEUNIT *)(CODE)->co_code_adaptive)))
597597
598598
599- /* Compute branch bias from the 16-bit branch history register.
600- * Returns 0 (completely unpredictable, 50/50) to 8 (fully biased). */
599+ /* Compute branch fitness penalty based on how likely the traced path is.
600+ * The penalty is small when the traced path is common, large when rare.
601+ * A branch that historically goes the other way gets a heavy penalty. */
601602static inline int
602- compute_branch_bias (uint16_t history )
603+ compute_branch_penalty (uint16_t history , bool branch_taken )
603604{
604- int ones = _Py_popcount32 ((uint32_t )history );
605- return abs (ones - 8 );
605+ int taken_count = _Py_popcount32 ((uint32_t )history );
606+ int on_trace_count = branch_taken ? taken_count : 16 - taken_count ;
607+ int off_trace = 16 - on_trace_count ;
608+ /* Quadratic scaling: off_trace^2 ranges from 0 (fully biased our way)
609+ * to 256 (fully biased against us, e.g. 15/16 left but traced right). */
610+ return FITNESS_BRANCH_BASE + off_trace * off_trace ;
606611}
607612
608613/* Compute exit quality for the current trace position.
609- * Higher values mean it's a better place to stop the trace. */
614+ * Higher values mean better places to stop the trace. */
610615static inline int32_t
611616compute_exit_quality (_Py_CODEUNIT * target_instr , int opcode ,
612- const _PyOptimizationConfig * cfg )
617+ const _PyJitTracerState * tracer )
613618{
619+ if (target_instr == tracer -> initial_state .start_instr ||
620+ target_instr == tracer -> initial_state .close_loop_instr ) {
621+ return EXIT_QUALITY_CLOSE_LOOP ;
622+ }
614623 if (target_instr -> op .code == ENTER_EXECUTOR ) {
615- return ( int32_t ) cfg -> exit_quality_enter_executor ;
624+ return EXIT_QUALITY_ENTER_EXECUTOR ;
616625 }
617626 if (_PyOpcode_Caches [_PyOpcode_Deopt [opcode ]] > 0 ) {
618- return (int32_t )cfg -> exit_quality_specializable ;
619- }
620- return (int32_t )cfg -> exit_quality_default ;
621- }
622-
623- /* Try to truncate the trace to the best recorded exit point.
624- * Returns 1 if successful, 0 if no valid best exit exists.
625- * Enforces progress constraints: the fallback position must satisfy
626- * the minimum trace length requirements. */
627- static inline int
628- try_best_exit_fallback (
629- _PyJitUopBuffer * trace ,
630- _PyJitTracerTranslatorState * ts ,
631- bool progress_needed )
632- {
633- int best_pos = ts -> best_exit_buffer_pos ;
634- if (best_pos <= 0 ) {
635- return 0 ;
636- } else if (progress_needed && best_pos <= CODE_SIZE_NO_PROGRESS ) {
637- return 0 ;
638- } else if (!progress_needed && best_pos <= CODE_SIZE_EMPTY ) {
639- return 0 ;
627+ return EXIT_QUALITY_SPECIALIZABLE ;
640628 }
641- trace -> next = trace -> start + best_pos ;
642- /* Caller must add terminator (_EXIT_TRACE) after this */
643- return 1 ;
629+ return EXIT_QUALITY_DEFAULT ;
644630}
645631
646- /* Update trace fitness after translating one bytecode instruction. */
647- static inline void
648- update_trace_fitness (
649- _PyJitTracerTranslatorState * ts ,
650- int opcode ,
651- _Py_CODEUNIT * target_instr ,
652- const _PyOptimizationConfig * cfg )
632+ static inline int32_t
633+ compute_frame_penalty (const _PyOptimizationConfig * cfg )
653634{
654- ts -> fitness -= cfg -> fitness_per_instruction ;
655-
656- switch (opcode ) {
657- case POP_JUMP_IF_FALSE :
658- case POP_JUMP_IF_TRUE :
659- case POP_JUMP_IF_NONE :
660- case POP_JUMP_IF_NOT_NONE : {
661- int bias = compute_branch_bias (target_instr [1 ].cache );
662- /* Linear interpolation: bias 0 → unbiased penalty, bias 8 → biased penalty */
663- int penalty = cfg -> fitness_branch_unbiased
664- - (bias * (cfg -> fitness_branch_unbiased - cfg -> fitness_branch_biased )) / 8 ;
665- ts -> fitness -= penalty ;
666- break ;
667- }
668- case JUMP_BACKWARD :
669- case JUMP_BACKWARD_JIT :
670- case JUMP_BACKWARD_NO_JIT :
671- ts -> fitness -= cfg -> fitness_backward_edge ;
672- break ;
673- default :
674- break ;
675- }
635+ return (int32_t )cfg -> fitness_initial / 5 + 1 ;
676636}
677637
678-
679638static int
680639is_terminator (const _PyUOpInstruction * uop )
681640{
@@ -812,20 +771,6 @@ _PyJit_translate_single_bytecode_to_trace(
812771 DPRINTF (2 , "Unsupported: oparg too large\n" );
813772 unsupported :
814773 {
815- // If we have a high-quality best_exit (enter_executor, etc.),
816- // prefer it over rewinding to last _SET_IP — this covers the
817- // main unsupported path, not just the edge case.
818- _PyJitTracerTranslatorState * ts_unsup = & tracer -> translator_state ;
819- if (ts_unsup -> best_exit_quality > (int32_t )tstate -> interp -> opt_config .exit_quality_default &&
820- try_best_exit_fallback (trace , ts_unsup , progress_needed )) {
821- ADD_TO_TRACE (_EXIT_TRACE , 0 , 0 , ts_unsup -> best_exit_target );
822- uop_buffer_last (trace )-> operand1 = true; // is_control_flow
823- OPT_STAT_INC (best_exit_fallback );
824- DPRINTF (2 , "Best-exit fallback at unsupported (pos=%d, quality=%d)\n" ,
825- ts_unsup -> best_exit_buffer_pos , ts_unsup -> best_exit_quality );
826- goto done ;
827- }
828- // Fall back: rewind to last _SET_IP and replace with _DEOPT.
829774 _PyUOpInstruction * curr = uop_buffer_last (trace );
830775 while (curr -> opcode != _SET_IP && uop_buffer_length (trace ) > 2 ) {
831776 trace -> next -- ;
@@ -855,31 +800,13 @@ _PyJit_translate_single_bytecode_to_trace(
855800
856801 // Fitness-based trace quality check (before reserving space for this instruction)
857802 _PyJitTracerTranslatorState * ts = & tracer -> translator_state ;
858- int32_t eq = compute_exit_quality (target_instr , opcode ,
859- & tstate -> interp -> opt_config );
860-
861- // Record best exit candidate.
862- // Only record after minimum progress to avoid truncating to near-empty traces.
863- if (eq > ts -> best_exit_quality &&
864- uop_buffer_length (trace ) > CODE_SIZE_NO_PROGRESS ) {
865- ts -> best_exit_quality = eq ;
866- ts -> best_exit_buffer_pos = uop_buffer_length (trace );
867- ts -> best_exit_target = target ;
868- }
803+ int32_t eq = compute_exit_quality (target_instr , opcode , tracer );
869804
870805 // Check if fitness is depleted — should we stop the trace?
871- if (ts -> fitness < eq &&
872- !(progress_needed && uop_buffer_length (trace ) < CODE_SIZE_NO_PROGRESS )) {
873- // Prefer stopping at the best recorded exit point
874- if (try_best_exit_fallback (trace , ts , progress_needed )) {
875- ADD_TO_TRACE (_EXIT_TRACE , 0 , 0 , ts -> best_exit_target );
876- uop_buffer_last (trace )-> operand1 = true; // is_control_flow
877- }
878- else {
879- // No valid best exit — stop at current position
880- ADD_TO_TRACE (_EXIT_TRACE , 0 , 0 , target );
881- uop_buffer_last (trace )-> operand1 = true; // is_control_flow
882- }
806+ if (ts -> fitness < eq ) {
807+ // This is a tracer heuristic rather than normal program control flow,
808+ // so leave operand1 clear and let the resulting side exit increase chain_depth.
809+ ADD_TO_TRACE (_EXIT_TRACE , 0 , 0 , target );
883810 OPT_STAT_INC (fitness_terminated_traces );
884811 DPRINTF (2 , "Fitness terminated: fitness=%d < exit_quality=%d\n" ,
885812 ts -> fitness , eq );
@@ -916,12 +843,6 @@ _PyJit_translate_single_bytecode_to_trace(
916843 DPRINTF (2 , "No room for expansions and guards (need %d, got %d)\n" ,
917844 space_needed , uop_buffer_remaining_space (trace ));
918845 OPT_STAT_INC (trace_too_long );
919- // Try best-exit fallback before giving up
920- if (try_best_exit_fallback (trace , & tracer -> translator_state , progress_needed )) {
921- ADD_TO_TRACE (_EXIT_TRACE , 0 , 0 , tracer -> translator_state .best_exit_target );
922- uop_buffer_last (trace )-> operand1 = true; // is_control_flow
923- OPT_STAT_INC (best_exit_fallback );
924- }
925846 goto done ;
926847 }
927848
@@ -945,13 +866,16 @@ _PyJit_translate_single_bytecode_to_trace(
945866 assert (jump_happened ? (next_instr == computed_jump_instr ) : (next_instr == computed_next_instr ));
946867 uint32_t uopcode = BRANCH_TO_GUARD [opcode - POP_JUMP_IF_FALSE ][jump_happened ];
947868 ADD_TO_TRACE (uopcode , 0 , 0 , INSTR_IP (jump_happened ? computed_next_instr : computed_jump_instr , old_code ));
869+ tracer -> translator_state .fitness -= compute_branch_penalty (
870+ target_instr [1 ].cache , jump_happened );
948871 break ;
949872 }
950873 case JUMP_BACKWARD_JIT :
951874 // This is possible as the JIT might have re-activated after it was disabled
952875 case JUMP_BACKWARD_NO_JIT :
953876 case JUMP_BACKWARD :
954877 ADD_TO_TRACE (_CHECK_PERIODIC , 0 , 0 , target );
878+ tracer -> translator_state .fitness -= FITNESS_BACKWARD_EDGE ;
955879 _Py_FALLTHROUGH ;
956880 case JUMP_BACKWARD_NO_INTERRUPT :
957881 {
@@ -1084,15 +1008,19 @@ _PyJit_translate_single_bytecode_to_trace(
10841008 ts_depth -> frame_depth );
10851009 goto unsupported ;
10861010 }
1087- int32_t penalty = (int32_t )tstate -> interp -> opt_config .fitness_frame_entry
1088- * ts_depth -> frame_depth ;
1089- ts_depth -> fitness -= penalty ;
1011+ int32_t frame_penalty = compute_frame_penalty (& tstate -> interp -> opt_config );
1012+ ts_depth -> fitness -= frame_penalty * ts_depth -> frame_depth ;
10901013 }
10911014 else if (uop == _RETURN_VALUE || uop == _RETURN_GENERATOR || uop == _YIELD_VALUE ) {
10921015 _PyJitTracerTranslatorState * ts_depth = & tracer -> translator_state ;
1016+ int32_t frame_penalty = compute_frame_penalty (& tstate -> interp -> opt_config );
10931017 if (ts_depth -> frame_depth <= 0 ) {
1094- // Underflow
1095- ts_depth -> fitness -= (int32_t )tstate -> interp -> opt_config .fitness_frame_entry * 2 ;
1018+ // Underflow: returning from a frame we didn't enter
1019+ ts_depth -> fitness -= frame_penalty * 2 ;
1020+ }
1021+ else {
1022+ // Reward returning: small inlined calls should be encouraged
1023+ ts_depth -> fitness += frame_penalty ;
10961024 }
10971025 ts_depth -> frame_depth = ts_depth -> frame_depth <= 0 ? 0 : ts_depth -> frame_depth - 1 ;
10981026 }
@@ -1140,8 +1068,7 @@ _PyJit_translate_single_bytecode_to_trace(
11401068 // Update fitness AFTER translation, BEFORE returning to continue tracing.
11411069 // This ensures the next iteration's fitness check reflects the cost of
11421070 // all instructions translated so far.
1143- update_trace_fitness (& tracer -> translator_state , opcode , target_instr ,
1144- & tstate -> interp -> opt_config );
1071+ tracer -> translator_state .fitness -= FITNESS_PER_INSTRUCTION ;
11451072 DPRINTF (2 , "Trace continuing (fitness=%d)\n" , tracer -> translator_state .fitness );
11461073 return 1 ;
11471074done :
@@ -1232,9 +1159,6 @@ _PyJit_TryInitializeTracing(
12321159 ts -> fitness = is_side_trace
12331160 ? (int32_t )cfg -> fitness_initial_side
12341161 : (int32_t )cfg -> fitness_initial ;
1235- ts -> best_exit_quality = 0 ;
1236- ts -> best_exit_buffer_pos = -1 ;
1237- ts -> best_exit_target = 0 ;
12381162 ts -> frame_depth = 0 ;
12391163
12401164 tracer -> is_tracing = true;
0 commit comments