Skip to content

Commit 632d197

Browse files
committed
refactor: integrate retry decorator settings into DependencyContainer and LangchainSummarizer
1 parent bde7a8c commit 632d197

3 files changed

Lines changed: 145 additions & 28 deletions

File tree

libs/admin-api-lib/src/admin_api_lib/dependency_container.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@
6464
from rag_core_lib.impl.settings.langfuse_settings import LangfuseSettings
6565
from rag_core_lib.impl.settings.ollama_llm_settings import OllamaSettings
6666
from rag_core_lib.impl.settings.rag_class_types_settings import RAGClassTypeSettings
67+
from rag_core_lib.impl.settings.retry_decorator_settings import RetryDecoratorSettings
6768
from rag_core_lib.impl.settings.stackit_vllm_settings import StackitVllmSettings
6869
from rag_core_lib.impl.tracers.langfuse_traced_chain import LangfuseTracedGraph
6970
from rag_core_lib.impl.utils.async_threadsafe_semaphore import AsyncThreadsafeSemaphore
@@ -86,6 +87,7 @@ class DependencyContainer(DeclarativeContainer):
8687
key_value_store_settings = KeyValueSettings()
8788
summarizer_settings = SummarizerSettings()
8889
source_uploader_settings = SourceUploaderSettings()
90+
retry_decorator_settings = RetryDecoratorSettings()
8991

9092
key_value_store = Singleton(FileStatusKeyValueStore, key_value_store_settings)
9193
file_service = Singleton(S3Service, s3_settings=s3_settings)
@@ -136,7 +138,9 @@ class DependencyContainer(DeclarativeContainer):
136138
LangchainSummarizer,
137139
langfuse_manager=langfuse_manager,
138140
chunker=summary_text_splitter,
139-
semaphore=Singleton(AsyncThreadsafeSemaphore, summarizer_settings.maximum_concurrreny),
141+
semaphore=Singleton(AsyncThreadsafeSemaphore, summarizer_settings.maximum_concurrency),
142+
summarizer_settings=summarizer_settings,
143+
retry_decorator_settings=retry_decorator_settings
140144
)
141145

142146
summary_enhancer = List(

libs/admin-api-lib/src/admin_api_lib/impl/settings/summarizer_settings.py

Lines changed: 59 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""Contains settings for summarizer."""
22

3-
from pydantic import Field
3+
from typing import Optional
4+
from pydantic import Field, PositiveInt
45
from pydantic_settings import BaseSettings
56

67

@@ -12,8 +13,22 @@ class SummarizerSettings(BaseSettings):
1213
----------
1314
maximum_input_size : int
1415
The maximum size of the input that the summarizer can handle. Default is 8000.
15-
maximum_concurrreny : int
16+
maximum_concurrency : int
1617
The maximum number of concurrent summarization processes. Default is 10.
18+
max_retries: Optional[PositiveInt]
19+
Total retries, not counting the initial attempt.
20+
retry_base_delay: Optional[float]
21+
Base delay in seconds for the first retry.
22+
retry_max_delay: Optional[float]
23+
Maximum delay cap in seconds for any single wait.
24+
backoff_factor: Optional[float]
25+
Exponential backoff factor (>= 1).
26+
attempt_cap: Optional[int]
27+
Cap for exponent growth (backoff_factor ** attempt_cap).
28+
jitter_min: Optional[float]
29+
Minimum jitter in seconds.
30+
jitter_max: Optional[float]
31+
Maximum jitter in seconds.
1732
"""
1833

1934
class Config:
@@ -23,4 +38,45 @@ class Config:
2338
case_sensitive = False
2439

2540
maximum_input_size: int = Field(default=8000)
26-
maximum_concurrreny: int = Field(default=10)
41+
maximum_concurrency: int = Field(default=10)
42+
max_retries: Optional[PositiveInt] = Field(
43+
default=None,
44+
title="Max Retries",
45+
description="Total retries, not counting the initial attempt.",
46+
)
47+
retry_base_delay: Optional[float] = Field(
48+
default=None,
49+
ge=0,
50+
title="Retry Base Delay",
51+
description="Base delay in seconds for the first retry.",
52+
)
53+
retry_max_delay: Optional[float] = Field(
54+
default=None,
55+
gt=0,
56+
title="Retry Max Delay",
57+
description="Maximum delay cap in seconds for any single wait.",
58+
)
59+
backoff_factor: Optional[float] = Field(
60+
default=None,
61+
ge=1.0,
62+
title="Backoff Factor",
63+
description="Exponential backoff factor (>= 1).",
64+
)
65+
attempt_cap: Optional[int] = Field(
66+
default=None,
67+
ge=0,
68+
title="Attempt Cap",
69+
description="Cap for exponent growth (backoff_factor ** attempt_cap).",
70+
)
71+
jitter_min: Optional[float] = Field(
72+
default=None,
73+
ge=0.0,
74+
title="Jitter Min (s)",
75+
description="Minimum jitter in seconds.",
76+
)
77+
jitter_max: Optional[float] = Field(
78+
default=None,
79+
ge=0.0,
80+
title="Jitter Max (s)",
81+
description="Maximum jitter in seconds.",
82+
)

libs/admin-api-lib/src/admin_api_lib/impl/summarizer/langchain_summarizer.py

Lines changed: 81 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,24 @@
11
"""Module for the LangchainSummarizer class."""
22

3+
import asyncio
34
import logging
4-
import traceback
55
from typing import Optional
66

77
from langchain.text_splitter import RecursiveCharacterTextSplitter
88
from langchain_core.documents import Document
99
from langchain_core.runnables import Runnable, RunnableConfig, ensure_config
10+
from openai import APIConnectionError, APIError, APITimeoutError, RateLimitError
1011

12+
from admin_api_lib.impl.settings.summarizer_settings import SummarizerSettings
1113
from admin_api_lib.summarizer.summarizer import (
1214
Summarizer,
1315
SummarizerInput,
1416
SummarizerOutput,
1517
)
1618
from rag_core_lib.impl.langfuse_manager.langfuse_manager import LangfuseManager
19+
from rag_core_lib.impl.settings.retry_decorator_settings import RetryDecoratorSettings
1720
from rag_core_lib.impl.utils.async_threadsafe_semaphore import AsyncThreadsafeSemaphore
21+
from rag_core_lib.impl.utils.retry_decorator import retry_with_backoff
1822

1923
logger = logging.getLogger(__name__)
2024

@@ -32,10 +36,15 @@ def __init__(
3236
langfuse_manager: LangfuseManager,
3337
chunker: RecursiveCharacterTextSplitter,
3438
semaphore: AsyncThreadsafeSemaphore,
39+
summarizer_settings: SummarizerSettings,
40+
retry_decorator_settings: RetryDecoratorSettings,
3541
):
3642
self._chunker = chunker
3743
self._langfuse_manager = langfuse_manager
3844
self._semaphore = semaphore
45+
self._retry_decorator_settings = self._create_retry_decorator_settings(
46+
summarizer_settings, retry_decorator_settings
47+
)
3948

4049
async def ainvoke(self, query: SummarizerInput, config: Optional[RunnableConfig] = None) -> SummarizerOutput:
4150
"""
@@ -65,40 +74,88 @@ async def ainvoke(self, query: SummarizerInput, config: Optional[RunnableConfig]
6574
"""
6675
assert query, "Query is empty: %s" % query # noqa S101
6776
config = ensure_config(config)
68-
tries_remaining = config.get("configurable", {}).get("tries_remaining", 3)
69-
logger.debug("Tries remaining %d" % tries_remaining)
7077

71-
if tries_remaining < 0:
72-
raise Exception("Summary creation failed.")
7378
document = Document(page_content=query)
7479
langchain_documents = self._chunker.split_documents([document])
80+
logger.debug("Summarizing %d chunk(s)...", len(langchain_documents))
7581

76-
outputs = []
77-
for langchain_document in langchain_documents:
78-
async with self._semaphore:
79-
try:
80-
result = await self._create_chain().ainvoke({"text": langchain_document.page_content}, config)
81-
# Extract content from AIMessage if it's not already a string
82-
content = result.content if hasattr(result, "content") else str(result)
83-
outputs.append(content)
84-
except Exception as e:
85-
logger.error("Error in summarizing langchain doc: %s %s", e, traceback.format_exc())
86-
config["tries_remaining"] = tries_remaining - 1
87-
result = await self._create_chain().ainvoke({"text": langchain_document.page_content}, config)
88-
# Extract content from AIMessage if it's not already a string
89-
content = result.content if hasattr(result, "content") else str(result)
90-
outputs.append(content)
82+
# Fan out with concurrency, bounded by your semaphore inside _summarize_chunk
83+
tasks = [asyncio.create_task(self._summarize_chunk(doc.page_content, config)) for doc in langchain_documents]
84+
outputs = await asyncio.gather(*tasks)
9185

9286
if len(outputs) == 1:
9387
return outputs[0]
94-
summary = " ".join(outputs)
88+
89+
# Optional single reduce pass (no recursion)
90+
merged = " ".join(outputs)
9591
logger.debug(
96-
"Reduced number of chars from %d to %d"
97-
% (len("".join([x.page_content for x in langchain_documents])), len(summary))
92+
"Reduced number of chars from %d to %d",
93+
len("".join([x.page_content for x in langchain_documents])),
94+
len(merged),
95+
)
96+
return await self._summarize_chunk(merged, config)
97+
98+
def _create_retry_decorator_settings(
99+
self, summarizer_settings: SummarizerSettings, retry_decorator_settings: RetryDecoratorSettings
100+
):
101+
return RetryDecoratorSettings(
102+
max_retries=(
103+
summarizer_settings.max_retries
104+
if summarizer_settings.max_retries is not None
105+
else retry_decorator_settings.max_retries
106+
),
107+
retry_base_delay=(
108+
summarizer_settings.retry_base_delay
109+
if summarizer_settings.retry_base_delay is not None
110+
else retry_decorator_settings.retry_base_delay
111+
),
112+
retry_max_delay=(
113+
summarizer_settings.retry_max_delay
114+
if summarizer_settings.retry_max_delay is not None
115+
else retry_decorator_settings.retry_max_delay
116+
),
117+
backoff_factor=(
118+
summarizer_settings.backoff_factor
119+
if summarizer_settings.backoff_factor is not None
120+
else retry_decorator_settings.backoff_factor
121+
),
122+
attempt_cap=(
123+
summarizer_settings.attempt_cap
124+
if summarizer_settings.attempt_cap is not None
125+
else retry_decorator_settings.attempt_cap
126+
),
127+
jitter_min=(
128+
summarizer_settings.jitter_min
129+
if summarizer_settings.jitter_min is not None
130+
else retry_decorator_settings.jitter_min
131+
),
132+
jitter_max=(
133+
summarizer_settings.jitter_max
134+
if summarizer_settings.jitter_max is not None
135+
else retry_decorator_settings.jitter_max
136+
),
98137
)
99-
return await self.ainvoke(summary, config)
100138

101139
def _create_chain(self) -> Runnable:
102140
return self._langfuse_manager.get_base_prompt(self.__class__.__name__) | self._langfuse_manager.get_base_llm(
103141
self.__class__.__name__
104142
)
143+
144+
def _retry_with_backoff_wrapper(self):
145+
# Prefer summarizer-specific overrides; fall back to global retry settings
146+
return retry_with_backoff(
147+
settings=self._retry_decorator_settings,
148+
exceptions=(APIError, RateLimitError, APITimeoutError, APIConnectionError),
149+
rate_limit_exceptions=(RateLimitError,),
150+
logger=logger,
151+
)
152+
153+
async def _summarize_chunk(self, text: str, config: Optional[RunnableConfig]) -> SummarizerOutput:
154+
@self._retry_with_backoff_wrapper()
155+
async def _call(text: str, config: Optional[RunnableConfig]) -> SummarizerOutput:
156+
response = await self._create_chain().ainvoke({"text": text}, config)
157+
return response.content if hasattr(response, "content") else str(response)
158+
159+
# Hold the semaphore for the entire retry lifecycle
160+
async with self._semaphore:
161+
return await _call(text, config)

0 commit comments

Comments
 (0)