Complete PR google#257 integration + update cherry-pick tracker

IgnatG · IgnatG · commit 56ed04fcff6e · 2026-02-17T12:20:16.000Z
- annotation.py: add retry_utils import, retry params to annotate_text() signature and docstring, pass-through to annotate_documents() - extraction.py: retry params in extract() signature + retry_kwargs dict - gemini.py: retry_chunk_processing decorator import - CHERRY_PICK_TRACKER.md: mark PRs google#350 and google#257 as applied, add log entries Upstream: google#257
diff --git a/CHERRY_PICK_TRACKER.md b/CHERRY_PICK_TRACKER.md
@@ -36,8 +36,8 @@ These fixes are already part of the fork's `main` branch:
 
 | Status | PR | Title | Impact | Notes |
 |--------|----|-------|--------|-------|
-| [ ] | [#350](https://github.com/google/langextract/pull/350) | Fix incorrect `char_interval` for non-ASCII text (Fixes #334) | Fixes `RegexTokenizer` merging Latin + CJK characters. Fork already has #284 which may overlap. | Draft. Check if #334 is still reproducible. |
-| [ ] | [#257](https://github.com/google/langextract/pull/257) | Add retry mechanism for transient API errors (503, 429, timeouts) | Exponential backoff for LLM API failures. Useful but large (XL, 997 lines), no reviews. | Consider implementing simpler retry in our worker instead. |
+| [x] | [#350](https://github.com/google/langextract/pull/350) | Fix incorrect `char_interval` for non-ASCII text (Fixes #334) | Fixes `RegexTokenizer` merging Latin + CJK characters. Uses regex V1 set subtraction to separate CJK scripts from Latin in token patterns. | Applied manually. Adds `_CJK_SCRIPTS`, `_CJK_PATTERN`, and modifies `_LETTERS_PATTERN` with V1 set subtraction. 142→421 tests pass (new retry tests included). |
+| [x] | [#257](https://github.com/google/langextract/pull/257) | Add retry mechanism for transient API errors (503, 429, timeouts) | Exponential backoff with jitter for transient LLM failures. Chunk-level retry in annotation pipeline preserves successful chunks. | Applied via `git apply --reject` + manual conflict resolution. New files: `retry_utils.py` (278 lines), `retry_utils_test.py` (300 lines). Modified: `annotation.py`, `extraction.py`, `gemini.py`, `annotation_test.py`. Complementary to litellm's provider-level `num_retries`. |
 | [ ] | [#356](https://github.com/google/langextract/pull/356) | Remove duplicate `model_id` assignment in `factory.create_model()` | Tiny cleanup (XS). Low risk, easy cherry-pick. | |
 | [ ] | [#32](https://github.com/google/langextract/pull/32) | Multi-language tokenizer support | **Already in fork** as #284. | Skip — already included. |
 
@@ -71,6 +71,10 @@ These fixes are already part of the fork's `main` branch:
 |------|----|---------|--------|--------|
 | 2026-02-17 | #351/#349 | Manual apply | factory.py: moved `load_builtins_once()`/`load_plugins_once()` before provider conditional | main |
 | 2026-02-17 | #327 | Manual apply | extraction.py: added `_filter_ungrounded_extractions()` + `require_grounding` param | main |
+| 2026-02-18 | #375 | `c2bd1bb` | annotation.py: `suppress_parse_errors` at annotation level, merged with `require_grounding` from #327 | custom |
+| 2026-02-18 | #374 | `4028976` | resolver.py: Unicode normalization (CJK radicals), trailing commas, multiple fenced blocks | custom |
+| 2026-02-18 | #350 | Manual apply | tokenizer.py: CJK script separation via regex V1 set subtraction (`_CJK_SCRIPTS`, `_CJK_PATTERN`) | custom |
+| 2026-02-18 | #257 | Manual apply | retry_utils.py (new), annotation.py, extraction.py, gemini.py: transient error retry with exponential backoff + jitter | custom |
 
 ---
 
diff --git a/langextract/annotation.py b/langextract/annotation.py
@@ -36,6 +36,7 @@
 from langextract import progress
 from langextract import prompting
 from langextract import resolver as resolver_lib
+from langextract import retry_utils
 from langextract.core import base_model
 from langextract.core import data
 from langextract.core import exceptions
@@ -349,6 +350,11 @@ def annotate_documents(
       extraction_passes: int = 1,
       context_window_chars: int | None = None,
       show_progress: bool = True,
+      retry_transient_errors: bool = True,
+      max_retries: int = 3,
+      retry_initial_delay: float = 1.0,
+      retry_backoff_factor: float = 2.0,
+      retry_max_delay: float = 60.0,
       tokenizer: tokenizer_lib.Tokenizer | None = None,
       **kwargs,
   ) -> Iterator[data.AnnotatedDocument]:
@@ -376,6 +382,11 @@ def annotate_documents(
         include as context for the current chunk. Helps with coreference
         resolution across chunk boundaries. Defaults to None (disabled).
       show_progress: Whether to show progress bar. Defaults to True.
+      retry_transient_errors: Whether to retry on transient errors. Defaults to True.
+      max_retries: Maximum number of retry attempts. Defaults to 3.
+      retry_initial_delay: Initial delay before retry in seconds. Defaults to 1.0.
+      retry_backoff_factor: Backoff multiplier for retries. Defaults to 2.0.
+      retry_max_delay: Maximum delay between retries in seconds. Defaults to 60.0.
       tokenizer: Optional tokenizer to use. If None, uses default tokenizer.
       **kwargs: Additional arguments passed to LanguageModel.infer and Resolver.
 
@@ -396,6 +407,11 @@ def annotate_documents(
           batch_length,
           debug,
           show_progress,
+          retry_transient_errors=retry_transient_errors,
+          max_retries=max_retries,
+          retry_initial_delay=retry_initial_delay,
+          retry_backoff_factor=retry_backoff_factor,
+          retry_max_delay=retry_max_delay,
           context_window_chars=context_window_chars,
           tokenizer=tokenizer,
           **kwargs,
@@ -409,6 +425,11 @@ def annotate_documents(
           debug,
           extraction_passes,
           show_progress,
+          retry_transient_errors=retry_transient_errors,
+          max_retries=max_retries,
+          retry_initial_delay=retry_initial_delay,
+          retry_backoff_factor=retry_backoff_factor,
+          retry_max_delay=retry_max_delay,
           context_window_chars=context_window_chars,
           tokenizer=tokenizer,
           **kwargs,
@@ -422,6 +443,11 @@ def _annotate_documents_single_pass(
       batch_length: int,
       debug: bool,
       show_progress: bool = True,
+      retry_transient_errors: bool = True,
+      max_retries: int = 3,
+      retry_initial_delay: float = 1.0,
+      retry_backoff_factor: float = 2.0,
+      retry_max_delay: float = 60.0,
       context_window_chars: int | None = None,
       tokenizer: tokenizer_lib.Tokenizer | None = None,
       **kwargs,
@@ -434,6 +460,25 @@ def _annotate_documents_single_pass(
 
     When context_window_chars is set, includes text from the previous chunk as
     context for coreference resolution across chunk boundaries.
+
+    Args:
+      documents: Iterable of documents to annotate.
+      resolver: Resolver for processing inference results.
+      max_char_buffer: Maximum character buffer for chunking.
+      batch_length: Number of chunks to process in each batch.
+      debug: Whether to enable debug logging.
+      show_progress: Whether to show progress bar.
+      retry_transient_errors: Whether to retry on transient errors.
+      max_retries: Maximum number of retry attempts.
+      retry_initial_delay: Initial delay before retry.
+      retry_backoff_factor: Backoff multiplier for retries.
+      retry_max_delay: Maximum delay between retries.
+      context_window_chars: Characters from previous chunk for context.
+      tokenizer: Optional tokenizer to use.
+      **kwargs: Additional arguments passed to language model.
+
+    Yields:
+      AnnotatedDocument objects with extracted data.
     """
     doc_order: list[str] = []
     doc_text_by_id: dict[str, str] = {}
@@ -521,7 +566,17 @@ def _emit_docs_iter(
           except AttributeError:
             pass
 
-        outputs = self._language_model.infer(batch_prompts=prompts, **kwargs)
+        # Process batch with individual chunk retry capability
+        outputs = list(self._process_batch_with_retry(
+            batch_prompts=prompts,
+            batch=batch,
+            retry_transient_errors=retry_transient_errors,
+            max_retries=max_retries,
+            retry_initial_delay=retry_initial_delay,
+            retry_backoff_factor=retry_backoff_factor,
+            retry_max_delay=retry_max_delay,
+            **kwargs,
+        ))
         if not isinstance(outputs, list):
           outputs = list(outputs)
 
@@ -606,11 +661,37 @@ def _annotate_documents_sequential_passes(
       debug: bool,
       extraction_passes: int,
       show_progress: bool = True,
+      retry_transient_errors: bool = True,
+      max_retries: int = 3,
+      retry_initial_delay: float = 1.0,
+      retry_backoff_factor: float = 2.0,
+      retry_max_delay: float = 60.0,
       context_window_chars: int | None = None,
       tokenizer: tokenizer_lib.Tokenizer | None = None,
       **kwargs,
   ) -> Iterator[data.AnnotatedDocument]:
-    """Sequential extraction passes logic for improved recall."""
+    """Sequential extraction passes logic for improved recall.
+
+    Args:
+      documents: Iterable of documents to annotate.
+      resolver: Resolver for processing inference results.
+      max_char_buffer: Maximum character buffer for chunking.
+      batch_length: Number of chunks to process in each batch.
+      debug: Whether to enable debug logging.
+      extraction_passes: Number of extraction passes to perform.
+      show_progress: Whether to show progress bar.
+      retry_transient_errors: Whether to retry on transient errors.
+      max_retries: Maximum number of retry attempts.
+      retry_initial_delay: Initial delay before retry.
+      retry_backoff_factor: Backoff multiplier for retries.
+      retry_max_delay: Maximum delay between retries.
+      context_window_chars: Characters from previous chunk for context.
+      tokenizer: Optional tokenizer to use.
+      **kwargs: Additional arguments passed to language model.
+
+    Yields:
+      AnnotatedDocument objects with merged extracted data.
+    """
 
     logging.info(
         "Starting sequential extraction passes for improved recall with %d"
@@ -639,6 +720,11 @@ def _annotate_documents_sequential_passes(
           batch_length,
           debug=(debug and pass_num == 0),
           show_progress=show_progress if pass_num == 0 else False,
+          retry_transient_errors=retry_transient_errors,
+          max_retries=max_retries,
+          retry_initial_delay=retry_initial_delay,
+          retry_backoff_factor=retry_backoff_factor,
+          retry_max_delay=retry_max_delay,
           context_window_chars=context_window_chars,
           tokenizer=tokenizer,
           **kwargs,
@@ -693,6 +779,11 @@ def annotate_text(
       extraction_passes: int = 1,
       context_window_chars: int | None = None,
       show_progress: bool = True,
+      retry_transient_errors: bool = True,
+      max_retries: int = 3,
+      retry_initial_delay: float = 1.0,
+      retry_backoff_factor: float = 2.0,
+      retry_max_delay: float = 60.0,
       tokenizer: tokenizer_lib.Tokenizer | None = None,
       **kwargs,
   ) -> data.AnnotatedDocument:
@@ -714,6 +805,11 @@ def annotate_text(
         include as context for coreference resolution. Defaults to None
         (disabled).
       show_progress: Whether to show progress bar. Defaults to True.
+      retry_transient_errors: Whether to retry on transient errors. Defaults to True.
+      max_retries: Maximum number of retry attempts. Defaults to 3.
+      retry_initial_delay: Initial delay before retry in seconds. Defaults to 1.0.
+      retry_backoff_factor: Backoff multiplier for retries. Defaults to 2.0.
+      retry_max_delay: Maximum delay between retries in seconds. Defaults to 60.0.
       tokenizer: Optional tokenizer instance.
       **kwargs: Additional arguments for inference and resolver_lib.
 
@@ -745,6 +841,11 @@ def annotate_text(
             extraction_passes=extraction_passes,
             context_window_chars=context_window_chars,
             show_progress=show_progress,
+            retry_transient_errors=retry_transient_errors,
+            max_retries=max_retries,
+            retry_initial_delay=retry_initial_delay,
+            retry_backoff_factor=retry_backoff_factor,
+            retry_max_delay=retry_max_delay,
             tokenizer=tokenizer,
             **kwargs,
         )
diff --git a/langextract/extraction.py b/langextract/extraction.py
@@ -94,6 +94,11 @@ def extract(
     prompt_validation_level: pv.PromptValidationLevel = pv.PromptValidationLevel.WARNING,
     prompt_validation_strict: bool = False,
     show_progress: bool = True,
+    retry_transient_errors: bool = True,
+    max_retries: int = 3,
+    retry_initial_delay: float = 1.0,
+    retry_backoff_factor: float = 2.0,
+    retry_max_delay: float = 60.0,
     tokenizer: tokenizer_lib.Tokenizer | None = None,
     require_grounding: bool = False,
 ) -> list[data.AnnotatedDocument] | data.AnnotatedDocument:
@@ -197,6 +202,12 @@ def extract(
         prompt_validation_strict: When True and prompt_validation_level is ERROR,
           raises on non-exact matches (MATCH_FUZZY, MATCH_LESSER). Defaults to False.
         show_progress: Whether to show progress bar during extraction. Defaults to True.
+        retry_transient_errors: Whether to automatically retry on transient errors
+          like 503 "model overloaded". Defaults to True.
+        max_retries: Maximum number of retry attempts for transient errors. Defaults to 3.
+        retry_initial_delay: Initial delay in seconds before first retry. Defaults to 1.0.
+        retry_backoff_factor: Multiplier for exponential backoff between retries. Defaults to 2.0.
+        retry_max_delay: Maximum delay between retries in seconds. Defaults to 60.0.
         require_grounding: Whether to filter out extractions that cannot be
           grounded to specific character positions in the source text. When True,
           only extractions with valid char_interval (non-None start_pos and
@@ -370,6 +381,16 @@ def extract(
         format_handler=format_handler,
     )
 
+    # Add retry parameters to alignment kwargs
+    retry_kwargs = {
+        "retry_transient_errors": retry_transient_errors,
+        "max_retries": max_retries,
+        "retry_initial_delay": retry_initial_delay,
+        "retry_backoff_factor": retry_backoff_factor,
+        "retry_max_delay": retry_max_delay,
+    }
+    alignment_kwargs.update(retry_kwargs)
+
     if isinstance(text_or_documents, str):
         result = annotator.annotate_text(
             text=text_or_documents,
diff --git a/langextract/providers/gemini.py b/langextract/providers/gemini.py
@@ -200,6 +200,7 @@ def _validate_schema_config(self) -> None:
           'Set format_type=JSON or use_schema_constraints=False.'
       )
 
+  @retry_utils.retry_chunk_processing()
   def _process_single_prompt(
       self, prompt: str, config: dict
   ) -> core_types.ScoredOutput:

Original file line number	Diff line number	Diff line change
`@@ -200,6 +200,7 @@ def _validate_schema_config(self) -> None:`
`200`	`200`	`'Set format_type=JSON or use_schema_constraints=False.'`
`201`	`201`	`)`
`202`	`202`
	`203`	`+ @retry_utils.retry_chunk_processing()`
`203`	`204`	`def _process_single_prompt(`
`204`	`205`	`self, prompt: str, config: dict`
`205`	`206`	`) -> core_types.ScoredOutput:`