Skip to content

Commit 56ed04f

Browse files
committed
Complete PR google#257 integration + update cherry-pick tracker
- annotation.py: add retry_utils import, retry params to annotate_text() signature and docstring, pass-through to annotate_documents() - extraction.py: retry params in extract() signature + retry_kwargs dict - gemini.py: retry_chunk_processing decorator import - CHERRY_PICK_TRACKER.md: mark PRs google#350 and google#257 as applied, add log entries Upstream: google#257
1 parent 00cf1da commit 56ed04f

4 files changed

Lines changed: 131 additions & 4 deletions

File tree

CHERRY_PICK_TRACKER.md

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,8 @@ These fixes are already part of the fork's `main` branch:
3636

3737
| Status | PR | Title | Impact | Notes |
3838
|--------|----|-------|--------|-------|
39-
| [ ] | [#350](https://github.com/google/langextract/pull/350) | Fix incorrect `char_interval` for non-ASCII text (Fixes #334) | Fixes `RegexTokenizer` merging Latin + CJK characters. Fork already has #284 which may overlap. | Draft. Check if #334 is still reproducible. |
40-
| [ ] | [#257](https://github.com/google/langextract/pull/257) | Add retry mechanism for transient API errors (503, 429, timeouts) | Exponential backoff for LLM API failures. Useful but large (XL, 997 lines), no reviews. | Consider implementing simpler retry in our worker instead. |
39+
| [x] | [#350](https://github.com/google/langextract/pull/350) | Fix incorrect `char_interval` for non-ASCII text (Fixes #334) | Fixes `RegexTokenizer` merging Latin + CJK characters. Uses regex V1 set subtraction to separate CJK scripts from Latin in token patterns. | Applied manually. Adds `_CJK_SCRIPTS`, `_CJK_PATTERN`, and modifies `_LETTERS_PATTERN` with V1 set subtraction. 142→421 tests pass (new retry tests included). |
40+
| [x] | [#257](https://github.com/google/langextract/pull/257) | Add retry mechanism for transient API errors (503, 429, timeouts) | Exponential backoff with jitter for transient LLM failures. Chunk-level retry in annotation pipeline preserves successful chunks. | Applied via `git apply --reject` + manual conflict resolution. New files: `retry_utils.py` (278 lines), `retry_utils_test.py` (300 lines). Modified: `annotation.py`, `extraction.py`, `gemini.py`, `annotation_test.py`. Complementary to litellm's provider-level `num_retries`. |
4141
| [ ] | [#356](https://github.com/google/langextract/pull/356) | Remove duplicate `model_id` assignment in `factory.create_model()` | Tiny cleanup (XS). Low risk, easy cherry-pick. | |
4242
| [ ] | [#32](https://github.com/google/langextract/pull/32) | Multi-language tokenizer support | **Already in fork** as #284. | Skip — already included. |
4343

@@ -71,6 +71,10 @@ These fixes are already part of the fork's `main` branch:
7171
|------|----|---------|--------|--------|
7272
| 2026-02-17 | #351/#349 | Manual apply | factory.py: moved `load_builtins_once()`/`load_plugins_once()` before provider conditional | main |
7373
| 2026-02-17 | #327 | Manual apply | extraction.py: added `_filter_ungrounded_extractions()` + `require_grounding` param | main |
74+
| 2026-02-18 | #375 | `c2bd1bb` | annotation.py: `suppress_parse_errors` at annotation level, merged with `require_grounding` from #327 | custom |
75+
| 2026-02-18 | #374 | `4028976` | resolver.py: Unicode normalization (CJK radicals), trailing commas, multiple fenced blocks | custom |
76+
| 2026-02-18 | #350 | Manual apply | tokenizer.py: CJK script separation via regex V1 set subtraction (`_CJK_SCRIPTS`, `_CJK_PATTERN`) | custom |
77+
| 2026-02-18 | #257 | Manual apply | retry_utils.py (new), annotation.py, extraction.py, gemini.py: transient error retry with exponential backoff + jitter | custom |
7478

7579
---
7680

langextract/annotation.py

Lines changed: 103 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
from langextract import progress
3737
from langextract import prompting
3838
from langextract import resolver as resolver_lib
39+
from langextract import retry_utils
3940
from langextract.core import base_model
4041
from langextract.core import data
4142
from langextract.core import exceptions
@@ -349,6 +350,11 @@ def annotate_documents(
349350
extraction_passes: int = 1,
350351
context_window_chars: int | None = None,
351352
show_progress: bool = True,
353+
retry_transient_errors: bool = True,
354+
max_retries: int = 3,
355+
retry_initial_delay: float = 1.0,
356+
retry_backoff_factor: float = 2.0,
357+
retry_max_delay: float = 60.0,
352358
tokenizer: tokenizer_lib.Tokenizer | None = None,
353359
**kwargs,
354360
) -> Iterator[data.AnnotatedDocument]:
@@ -376,6 +382,11 @@ def annotate_documents(
376382
include as context for the current chunk. Helps with coreference
377383
resolution across chunk boundaries. Defaults to None (disabled).
378384
show_progress: Whether to show progress bar. Defaults to True.
385+
retry_transient_errors: Whether to retry on transient errors. Defaults to True.
386+
max_retries: Maximum number of retry attempts. Defaults to 3.
387+
retry_initial_delay: Initial delay before retry in seconds. Defaults to 1.0.
388+
retry_backoff_factor: Backoff multiplier for retries. Defaults to 2.0.
389+
retry_max_delay: Maximum delay between retries in seconds. Defaults to 60.0.
379390
tokenizer: Optional tokenizer to use. If None, uses default tokenizer.
380391
**kwargs: Additional arguments passed to LanguageModel.infer and Resolver.
381392
@@ -396,6 +407,11 @@ def annotate_documents(
396407
batch_length,
397408
debug,
398409
show_progress,
410+
retry_transient_errors=retry_transient_errors,
411+
max_retries=max_retries,
412+
retry_initial_delay=retry_initial_delay,
413+
retry_backoff_factor=retry_backoff_factor,
414+
retry_max_delay=retry_max_delay,
399415
context_window_chars=context_window_chars,
400416
tokenizer=tokenizer,
401417
**kwargs,
@@ -409,6 +425,11 @@ def annotate_documents(
409425
debug,
410426
extraction_passes,
411427
show_progress,
428+
retry_transient_errors=retry_transient_errors,
429+
max_retries=max_retries,
430+
retry_initial_delay=retry_initial_delay,
431+
retry_backoff_factor=retry_backoff_factor,
432+
retry_max_delay=retry_max_delay,
412433
context_window_chars=context_window_chars,
413434
tokenizer=tokenizer,
414435
**kwargs,
@@ -422,6 +443,11 @@ def _annotate_documents_single_pass(
422443
batch_length: int,
423444
debug: bool,
424445
show_progress: bool = True,
446+
retry_transient_errors: bool = True,
447+
max_retries: int = 3,
448+
retry_initial_delay: float = 1.0,
449+
retry_backoff_factor: float = 2.0,
450+
retry_max_delay: float = 60.0,
425451
context_window_chars: int | None = None,
426452
tokenizer: tokenizer_lib.Tokenizer | None = None,
427453
**kwargs,
@@ -434,6 +460,25 @@ def _annotate_documents_single_pass(
434460
435461
When context_window_chars is set, includes text from the previous chunk as
436462
context for coreference resolution across chunk boundaries.
463+
464+
Args:
465+
documents: Iterable of documents to annotate.
466+
resolver: Resolver for processing inference results.
467+
max_char_buffer: Maximum character buffer for chunking.
468+
batch_length: Number of chunks to process in each batch.
469+
debug: Whether to enable debug logging.
470+
show_progress: Whether to show progress bar.
471+
retry_transient_errors: Whether to retry on transient errors.
472+
max_retries: Maximum number of retry attempts.
473+
retry_initial_delay: Initial delay before retry.
474+
retry_backoff_factor: Backoff multiplier for retries.
475+
retry_max_delay: Maximum delay between retries.
476+
context_window_chars: Characters from previous chunk for context.
477+
tokenizer: Optional tokenizer to use.
478+
**kwargs: Additional arguments passed to language model.
479+
480+
Yields:
481+
AnnotatedDocument objects with extracted data.
437482
"""
438483
doc_order: list[str] = []
439484
doc_text_by_id: dict[str, str] = {}
@@ -521,7 +566,17 @@ def _emit_docs_iter(
521566
except AttributeError:
522567
pass
523568

524-
outputs = self._language_model.infer(batch_prompts=prompts, **kwargs)
569+
# Process batch with individual chunk retry capability
570+
outputs = list(self._process_batch_with_retry(
571+
batch_prompts=prompts,
572+
batch=batch,
573+
retry_transient_errors=retry_transient_errors,
574+
max_retries=max_retries,
575+
retry_initial_delay=retry_initial_delay,
576+
retry_backoff_factor=retry_backoff_factor,
577+
retry_max_delay=retry_max_delay,
578+
**kwargs,
579+
))
525580
if not isinstance(outputs, list):
526581
outputs = list(outputs)
527582

@@ -606,11 +661,37 @@ def _annotate_documents_sequential_passes(
606661
debug: bool,
607662
extraction_passes: int,
608663
show_progress: bool = True,
664+
retry_transient_errors: bool = True,
665+
max_retries: int = 3,
666+
retry_initial_delay: float = 1.0,
667+
retry_backoff_factor: float = 2.0,
668+
retry_max_delay: float = 60.0,
609669
context_window_chars: int | None = None,
610670
tokenizer: tokenizer_lib.Tokenizer | None = None,
611671
**kwargs,
612672
) -> Iterator[data.AnnotatedDocument]:
613-
"""Sequential extraction passes logic for improved recall."""
673+
"""Sequential extraction passes logic for improved recall.
674+
675+
Args:
676+
documents: Iterable of documents to annotate.
677+
resolver: Resolver for processing inference results.
678+
max_char_buffer: Maximum character buffer for chunking.
679+
batch_length: Number of chunks to process in each batch.
680+
debug: Whether to enable debug logging.
681+
extraction_passes: Number of extraction passes to perform.
682+
show_progress: Whether to show progress bar.
683+
retry_transient_errors: Whether to retry on transient errors.
684+
max_retries: Maximum number of retry attempts.
685+
retry_initial_delay: Initial delay before retry.
686+
retry_backoff_factor: Backoff multiplier for retries.
687+
retry_max_delay: Maximum delay between retries.
688+
context_window_chars: Characters from previous chunk for context.
689+
tokenizer: Optional tokenizer to use.
690+
**kwargs: Additional arguments passed to language model.
691+
692+
Yields:
693+
AnnotatedDocument objects with merged extracted data.
694+
"""
614695

615696
logging.info(
616697
"Starting sequential extraction passes for improved recall with %d"
@@ -639,6 +720,11 @@ def _annotate_documents_sequential_passes(
639720
batch_length,
640721
debug=(debug and pass_num == 0),
641722
show_progress=show_progress if pass_num == 0 else False,
723+
retry_transient_errors=retry_transient_errors,
724+
max_retries=max_retries,
725+
retry_initial_delay=retry_initial_delay,
726+
retry_backoff_factor=retry_backoff_factor,
727+
retry_max_delay=retry_max_delay,
642728
context_window_chars=context_window_chars,
643729
tokenizer=tokenizer,
644730
**kwargs,
@@ -693,6 +779,11 @@ def annotate_text(
693779
extraction_passes: int = 1,
694780
context_window_chars: int | None = None,
695781
show_progress: bool = True,
782+
retry_transient_errors: bool = True,
783+
max_retries: int = 3,
784+
retry_initial_delay: float = 1.0,
785+
retry_backoff_factor: float = 2.0,
786+
retry_max_delay: float = 60.0,
696787
tokenizer: tokenizer_lib.Tokenizer | None = None,
697788
**kwargs,
698789
) -> data.AnnotatedDocument:
@@ -714,6 +805,11 @@ def annotate_text(
714805
include as context for coreference resolution. Defaults to None
715806
(disabled).
716807
show_progress: Whether to show progress bar. Defaults to True.
808+
retry_transient_errors: Whether to retry on transient errors. Defaults to True.
809+
max_retries: Maximum number of retry attempts. Defaults to 3.
810+
retry_initial_delay: Initial delay before retry in seconds. Defaults to 1.0.
811+
retry_backoff_factor: Backoff multiplier for retries. Defaults to 2.0.
812+
retry_max_delay: Maximum delay between retries in seconds. Defaults to 60.0.
717813
tokenizer: Optional tokenizer instance.
718814
**kwargs: Additional arguments for inference and resolver_lib.
719815
@@ -745,6 +841,11 @@ def annotate_text(
745841
extraction_passes=extraction_passes,
746842
context_window_chars=context_window_chars,
747843
show_progress=show_progress,
844+
retry_transient_errors=retry_transient_errors,
845+
max_retries=max_retries,
846+
retry_initial_delay=retry_initial_delay,
847+
retry_backoff_factor=retry_backoff_factor,
848+
retry_max_delay=retry_max_delay,
748849
tokenizer=tokenizer,
749850
**kwargs,
750851
)

langextract/extraction.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,11 @@ def extract(
9494
prompt_validation_level: pv.PromptValidationLevel = pv.PromptValidationLevel.WARNING,
9595
prompt_validation_strict: bool = False,
9696
show_progress: bool = True,
97+
retry_transient_errors: bool = True,
98+
max_retries: int = 3,
99+
retry_initial_delay: float = 1.0,
100+
retry_backoff_factor: float = 2.0,
101+
retry_max_delay: float = 60.0,
97102
tokenizer: tokenizer_lib.Tokenizer | None = None,
98103
require_grounding: bool = False,
99104
) -> list[data.AnnotatedDocument] | data.AnnotatedDocument:
@@ -197,6 +202,12 @@ def extract(
197202
prompt_validation_strict: When True and prompt_validation_level is ERROR,
198203
raises on non-exact matches (MATCH_FUZZY, MATCH_LESSER). Defaults to False.
199204
show_progress: Whether to show progress bar during extraction. Defaults to True.
205+
retry_transient_errors: Whether to automatically retry on transient errors
206+
like 503 "model overloaded". Defaults to True.
207+
max_retries: Maximum number of retry attempts for transient errors. Defaults to 3.
208+
retry_initial_delay: Initial delay in seconds before first retry. Defaults to 1.0.
209+
retry_backoff_factor: Multiplier for exponential backoff between retries. Defaults to 2.0.
210+
retry_max_delay: Maximum delay between retries in seconds. Defaults to 60.0.
200211
require_grounding: Whether to filter out extractions that cannot be
201212
grounded to specific character positions in the source text. When True,
202213
only extractions with valid char_interval (non-None start_pos and
@@ -370,6 +381,16 @@ def extract(
370381
format_handler=format_handler,
371382
)
372383

384+
# Add retry parameters to alignment kwargs
385+
retry_kwargs = {
386+
"retry_transient_errors": retry_transient_errors,
387+
"max_retries": max_retries,
388+
"retry_initial_delay": retry_initial_delay,
389+
"retry_backoff_factor": retry_backoff_factor,
390+
"retry_max_delay": retry_max_delay,
391+
}
392+
alignment_kwargs.update(retry_kwargs)
393+
373394
if isinstance(text_or_documents, str):
374395
result = annotator.annotate_text(
375396
text=text_or_documents,

langextract/providers/gemini.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,7 @@ def _validate_schema_config(self) -> None:
200200
'Set format_type=JSON or use_schema_constraints=False.'
201201
)
202202

203+
@retry_utils.retry_chunk_processing()
203204
def _process_single_prompt(
204205
self, prompt: str, config: dict
205206
) -> core_types.ScoredOutput:

0 commit comments

Comments
 (0)