@@ -270,7 +270,7 @@ def _get_matches_with_fallback(
270270 List of delimiter matches
271271 """
272272 governor .check_timeout ()
273- matches = self ._find_delimiter_matches (content )
273+ matches = self ._find_delimiter_matches (content , governor = governor )
274274
275275 if not matches :
276276 matches = self ._fallback_paragraph_chunking (content )
@@ -340,14 +340,17 @@ def _enforce_chunk_limit(self, chunks: list[CodeChunk], file_path: Path | None)
340340 file_path = str (file_path ) if file_path else None ,
341341 )
342342
343- def _find_delimiter_matches (self , content : str ) -> list [DelimiterMatch ]:
343+ def _find_delimiter_matches (
344+ self , content : str , * , governor : Any | None = None
345+ ) -> list [DelimiterMatch ]:
344346 """Find all delimiter matches in content using two-phase matching.
345347
346348 Phase 1: Matches explicit start/end pairs (e.g., {...}, (...))
347349 Phase 2: Matches keyword delimiters with empty ends (e.g., function, def, class)
348350
349351 Args:
350352 content: Source code to scan
353+ governor: Optional resource governor for timeout checks between phases
351354
352355 Returns:
353356 List of DelimiterMatch objects ordered by position
@@ -364,6 +367,10 @@ def _find_delimiter_matches(self, content: str) -> list[DelimiterMatch]:
364367 # Phase 1: Handle explicit start/end pairs (existing logic)
365368 matches .extend (self ._match_explicit_delimiters (content , explicit_delimiters ))
366369
370+ # Check timeout between phases to avoid unbounded work
371+ if governor is not None :
372+ governor .check_timeout ()
373+
367374 # Phase 2: Handle keyword delimiters with empty ends
368375 matches .extend (self ._match_keyword_delimiters (content , keyword_delimiters ))
369376
@@ -475,9 +482,17 @@ def _match_keyword_delimiters(
475482 for delimiter in keyword_delimiters :
476483 delimiter_map .setdefault (delimiter .start , []).append (delimiter )
477484
478- for match in combined_pattern .finditer (content ):
479- matched_text = match .group (0 )
480- keyword_pos = match .start ()
485+ keyword_hits = [
486+ (match .start (), match .group (0 )) for match in combined_pattern .finditer (content )
487+ ]
488+ # Precompute brace-nesting levels at all keyword positions in a single
489+ # O(n) forward pass. The previous approach called _calculate_nesting_level
490+ # per keyword match, each scanning from position 0, resulting in O(n * m)
491+ # total work that caused timeouts on large files (especially Python 3.13).
492+ keyword_positions = [pos for pos , _ in keyword_hits ]
493+ nesting_at = self ._precompute_nesting_levels (content , keyword_positions )
494+
495+ for keyword_pos , matched_text in keyword_hits :
481496
482497 # Skip if keyword is inside a string or comment
483498 if self ._is_inside_string_or_comment (content , keyword_pos ):
@@ -501,8 +516,8 @@ def _match_keyword_delimiters(
501516 )
502517
503518 if struct_end is not None :
504- # Calculate nesting level by counting parent structures
505- nesting_level = self . _calculate_nesting_level ( content , keyword_pos )
519+ # Look up precomputed nesting level (O(1) per keyword)
520+ nesting_level = nesting_at . get ( keyword_pos , 0 )
506521
507522 # Create a complete match from keyword to closing structure
508523 # This represents the entire construct (e.g., function...})
@@ -517,6 +532,84 @@ def _match_keyword_delimiters(
517532
518533 return matches
519534
535+ def _precompute_nesting_levels (
536+ self , content : str , positions : list [int ]
537+ ) -> dict [int , int ]:
538+ """Precompute brace-nesting levels at given positions in a single forward pass.
539+
540+ Replaces per-position calls to ``_calculate_nesting_level`` which each
541+ scanned from position 0, yielding O(n * m) total work. This method
542+ achieves the same result in O(n + m) by walking the content once and
543+ recording the running brace depth at each requested position.
544+
545+ Args:
546+ content: Source code
547+ positions: Character offsets whose nesting level is needed
548+
549+ Returns:
550+ Mapping from position to nesting level (0 = top-level)
551+ """
552+ if not positions :
553+ return {}
554+
555+ result : dict [int , int ] = {}
556+ sorted_positions = sorted (positions )
557+ pos_idx = 0
558+ brace_depth = 0
559+ content_len = len (content )
560+ pos = 0
561+ string_state = StringParseState (in_string = False , delimiter = None )
562+
563+ while pos < content_len :
564+ # Record nesting level for every target position we have reached
565+ while pos_idx < len (sorted_positions ) and sorted_positions [pos_idx ] <= pos :
566+ result [sorted_positions [pos_idx ]] = brace_depth
567+ pos_idx += 1
568+
569+ if pos_idx >= len (sorted_positions ):
570+ break # All positions recorded
571+
572+ pos , brace_depth , string_state = self ._advance_nesting_state (
573+ content , pos , content_len , brace_depth , string_state
574+ )
575+
576+ # Any remaining positions beyond the end of content
577+ for p in sorted_positions [pos_idx :]:
578+ result [p ] = brace_depth
579+
580+ return result
581+
582+ def _advance_nesting_state (
583+ self ,
584+ content : str ,
585+ pos : int ,
586+ content_len : int ,
587+ brace_depth : int ,
588+ string_state : StringParseState ,
589+ ) -> tuple [int , int , StringParseState ]:
590+ """Advance parsing state for nesting-level precomputation."""
591+ char = content [pos ]
592+
593+ # Track string boundaries
594+ if self ._is_string_boundary (char ):
595+ string_state = self ._update_string_state (content , pos , char , string_state )
596+
597+ if string_state .in_string :
598+ return pos + 1 , brace_depth , string_state
599+
600+ comment_skip = self ._skip_comment (content , pos , content_len )
601+ if comment_skip is not None :
602+ if comment_skip == - 1 :
603+ return content_len , brace_depth , string_state
604+ return comment_skip , brace_depth , string_state
605+
606+ if char == "{" :
607+ brace_depth += 1
608+ elif char == "}" :
609+ brace_depth = max (0 , brace_depth - 1 )
610+
611+ return pos + 1 , brace_depth , string_state
612+
520613 def _calculate_nesting_level (self , content : str , pos : int ) -> int :
521614 """Calculate nesting level at a given position by counting braces.
522615
@@ -527,45 +620,10 @@ def _calculate_nesting_level(self, content: str, pos: int) -> int:
527620 Returns:
528621 Nesting level (0 = top level, 1+ = nested)
529622 """
530- # Count opening and closing braces before this position
531- # Ignore braces in strings and comments
532- brace_depth = 0
533- i = 0
534- in_string = False
535- string_char = None
536-
537- while i < pos :
538- c = content [i ]
539-
540- # Handle strings
541- if c in ('"' , "'" , "`" ) and (i == 0 or content [i - 1 ] != "\\ " ):
542- if not in_string :
543- in_string = True
544- string_char = c
545- elif c == string_char :
546- in_string = False
547- string_char = None
548-
549- # Handle comments (simplified - just check for // and /*)
550- elif not in_string :
551- if content [i : i + 2 ] == "//" :
552- # Skip to end of line
553- next_newline = content .find ("\n " , i )
554- i = next_newline if next_newline >= 0 else len (content )
555- continue
556- if content [i : i + 2 ] == "/*" :
557- # Skip to end of comment
558- end_comment = content .find ("*/" , i + 2 )
559- i = end_comment + 2 if end_comment >= 0 else len (content )
560- continue
561- if c == "{" :
562- brace_depth += 1
563- elif c == "}" :
564- brace_depth = max (0 , brace_depth - 1 )
565-
566- i += 1
623+ if pos <= 0 :
624+ return 0
567625
568- return brace_depth
626+ return self . _precompute_nesting_levels ( content , [ pos ]). get ( pos , 0 )
569627
570628 def _find_next_structural_with_char (
571629 self , content : str , start : int , allowed : frozenset [str ]
@@ -660,13 +718,14 @@ def _skip_comment(self, content: str, pos: int, content_len: int) -> int | None:
660718 Returns:
661719 New position after comment, -1 if comment to EOF, None if no comment
662720 """
663- if pos + 1 >= content_len :
721+ if pos >= content_len :
664722 return None
665723
666724 two_chars = content [pos : pos + 2 ]
725+ char = content [pos ]
667726
668727 # Line comments
669- if two_chars in ( "//" , "#" ) :
728+ if two_chars == "//" or char == "#" :
670729 newline_pos = content .find ("\n " , pos )
671730 return - 1 if newline_pos == - 1 else newline_pos + 1
672731 # Block comments
@@ -833,12 +892,13 @@ def _skip_comment_in_matching(self, content: str, pos: int, content_len: int) ->
833892 Returns:
834893 New position, -1 if end reached, None if no comment
835894 """
836- if pos + 1 >= content_len :
895+ if pos >= content_len :
837896 return None
838897
839898 two_chars = content [pos : pos + 2 ]
899+ char = content [pos ]
840900
841- if two_chars in ( "//" , "#" ) :
901+ if two_chars == "//" or char == "#" :
842902 newline = content .find ("\n " , pos )
843903 return - 1 if newline == - 1 else newline
844904 if two_chars == "/*" :
0 commit comments