@@ -498,13 +498,14 @@ fn process_container<'py>(
498498
499499 // ── String leaf ──
500500 //
501- // Fast path: check Python string length via O(1) PyAny::len() BEFORE
502- // extracting to a Rust String (which copies all bytes). If the string
503- // is under both the char and token limits, and there's no min constraint,
504- // we can skip the extraction entirely.
505- if container. is_instance_of :: < PyString > ( ) {
506- // Python str.__len__() returns the number of code points — equivalent
507- // to Rust chars().count() for BMP, and close enough for our limits.
501+ // Optimization layers (cheapest first):
502+ // 1. O(1) Python len() pre-check — skip entirely for under-limit strings
503+ // 2. PyString::to_str() zero-copy borrow — no String allocation
504+ // 3. count_chars_capped() — O(limit) char counting
505+ // 4. Skip is_numeric_string for strings > 50 bytes
506+ // 5. byte_offset_of_char() for direct &str slicing
507+ if let Ok ( py_str) = container. cast :: < PyString > ( ) {
508+ // ── Layer 1: O(1) pre-check via Python str.__len__() ──
508509 let py_len = container. len ( ) . unwrap_or ( 0 ) ;
509510
510511 let needs_processing = match cfg. limit_mode {
@@ -521,22 +522,20 @@ fn process_container<'py>(
521522 }
522523 } ;
523524
524- // If the string doesn't need processing AND strategy is truncate
525- // (so no violation to report), skip the expensive extraction.
526- if !needs_processing && cfg. strategy == Strategy :: Truncate {
525+ // Under-limit strings need no further work regardless of strategy
526+ if !needs_processing {
527527 return Ok ( ProcessResult :: Unchanged ) ;
528528 }
529- }
530529
531- if let Ok ( text) = container. extract :: < String > ( ) {
532- // Skip numeric strings
533- if is_numeric_string ( & text) {
530+ // ── Layer 2: zero-copy borrow ──
531+ let text: & str = py_str. to_str ( ) ?;
532+
533+ // Skip numeric check for long strings (no number is > 50 chars)
534+ if text. len ( ) <= 50 && is_numeric_string ( text) {
534535 return Ok ( ProcessResult :: Unchanged ) ;
535536 }
536537
537- // Use capped char counting — O(limit) instead of O(n).
538- // For token mode, count (max_tokens+1)*cpt chars so the over-limit
539- // check `token_count > max_tokens` works correctly.
538+ // ── Layer 3: capped char counting ──
540539 let limit_for_counting = match cfg. limit_mode {
541540 LimitMode :: Character => cfg. max_chars . unwrap_or ( usize:: MAX ) ,
542541 LimitMode :: Token => cfg
@@ -545,10 +544,9 @@ fn process_container<'py>(
545544 . saturating_add ( 1 )
546545 . saturating_mul ( cfg. chars_per_token ) ,
547546 } ;
548- let ( char_count, _) = count_chars_capped ( & text, limit_for_counting) ;
547+ let ( char_count, _) = count_chars_capped ( text, limit_for_counting) ;
549548 let token_count = estimate_tokens ( char_count, cfg. chars_per_token ) ;
550549
551- // Determine if out of bounds based on limit_mode
552550 let ( below_min, above_max) = match cfg. limit_mode {
553551 LimitMode :: Character => {
554552 let below = cfg. min_chars > 0 && char_count < cfg. min_chars ;
@@ -566,19 +564,30 @@ fn process_container<'py>(
566564 return Ok ( ProcessResult :: Unchanged ) ;
567565 }
568566
569- // Block strategy
567+ // ── Block strategy ──
570568 if cfg. strategy == Strategy :: Block {
571569 let location = if path. is_empty ( ) {
572570 String :: new ( )
573571 } else {
574572 format ! ( " at {path}" )
575573 } ;
576574
577- let preview = if text. chars ( ) . count ( ) > 50 {
578- let p: String = text. chars ( ) . take ( 50 ) . collect ( ) ;
579- p + "..."
575+ let preview = if text. len ( ) > 53 {
576+ let end = text
577+ . char_indices ( )
578+ . nth ( 50 )
579+ . map_or ( text. len ( ) , |( off, _) | off) ;
580+ format ! ( "{}..." , & text[ ..end] )
580581 } else {
581- text. clone ( )
582+ text. to_string ( )
583+ } ;
584+
585+ let loc_str = || {
586+ if path. is_empty ( ) {
587+ "root" . to_string ( )
588+ } else {
589+ path. to_string ( )
590+ }
582591 } ;
583592
584593 let ( reason, description, code, details) = if cfg. limit_mode == LimitMode :: Token
@@ -599,14 +608,7 @@ fn process_container<'py>(
599608 "strategy" . to_string ( ) ,
600609 ViolationValue :: Str ( "block" . to_string ( ) ) ,
601610 ) ;
602- d. insert (
603- "location" . to_string ( ) ,
604- ViolationValue :: Str ( if path. is_empty ( ) {
605- "root" . to_string ( )
606- } else {
607- path. to_string ( )
608- } ) ,
609- ) ;
611+ d. insert ( "location" . to_string ( ) , ViolationValue :: Str ( loc_str ( ) ) ) ;
610612 d. insert ( "value_preview" . to_string ( ) , ViolationValue :: Str ( preview) ) ;
611613 (
612614 format ! ( "Output length out of bounds{location}" ) ,
@@ -623,14 +625,7 @@ fn process_container<'py>(
623625 "strategy" . to_string ( ) ,
624626 ViolationValue :: Str ( "block" . to_string ( ) ) ,
625627 ) ;
626- d. insert (
627- "location" . to_string ( ) ,
628- ViolationValue :: Str ( if path. is_empty ( ) {
629- "root" . to_string ( )
630- } else {
631- path. to_string ( )
632- } ) ,
633- ) ;
628+ d. insert ( "location" . to_string ( ) , ViolationValue :: Str ( loc_str ( ) ) ) ;
634629 d. insert ( "value_preview" . to_string ( ) , ViolationValue :: Str ( preview) ) ;
635630 (
636631 format ! ( "Output length out of bounds{location}" ) ,
@@ -639,7 +634,6 @@ fn process_container<'py>(
639634 d,
640635 )
641636 } else {
642- // Min violation
643637 let mut d = HashMap :: new ( ) ;
644638 d. insert ( "length" . to_string ( ) , ViolationValue :: Int ( char_count as i64 ) ) ;
645639 d. insert (
@@ -654,14 +648,7 @@ fn process_container<'py>(
654648 "min_tokens" . to_string ( ) ,
655649 ViolationValue :: Int ( cfg. min_tokens as i64 ) ,
656650 ) ;
657- d. insert (
658- "location" . to_string ( ) ,
659- ViolationValue :: Str ( if path. is_empty ( ) {
660- "root" . to_string ( )
661- } else {
662- path. to_string ( )
663- } ) ,
664- ) ;
651+ d. insert ( "location" . to_string ( ) , ViolationValue :: Str ( loc_str ( ) ) ) ;
665652 (
666653 format ! ( "Output length out of bounds{location}" ) ,
667654 format ! (
@@ -682,12 +669,12 @@ fn process_container<'py>(
682669 } ) ) ;
683670 }
684671
685- // Truncate strategy — only truncate if above max
672+ // ── Truncate: only if above max ──
686673 if above_max {
687- let truncated = truncate ( & text, cfg) ;
674+ let truncated = truncate ( text, cfg) ;
688675 if truncated != text {
689- let py_str = PyString :: new ( py, & truncated) ;
690- return Ok ( ProcessResult :: Modified ( py_str . into_any ( ) ) ) ;
676+ let new_py_str = PyString :: new ( py, & truncated) ;
677+ return Ok ( ProcessResult :: Modified ( new_py_str . into_any ( ) ) ) ;
691678 }
692679 }
693680
0 commit comments