Skip to content

Commit 7508746

Browse files
committed
refactor(rlsapi): inline single-use helpers and fix cache size
- Inline _get_cla_version, _get_configured_default_model_name, _is_verbose_enabled, and _get_prompt_template at their sole call sites - Reduce _compile_prompt_template lru_cache maxsize from 8 to 1 since only one system prompt exists at a time Signed-off-by: Major Hayden <major@redhat.com>
1 parent 890a6f7 commit 7508746

1 file changed

Lines changed: 17 additions & 54 deletions

File tree

src/app/endpoints/rlsapi_v1.py

Lines changed: 17 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -112,17 +112,23 @@ def _build_instructions(systeminfo: RlsapiV1SystemInfo) -> str:
112112
Returns:
113113
The rendered instructions string for the LLM.
114114
"""
115+
prompt = (
116+
configuration.customization.system_prompt
117+
if configuration.customization is not None
118+
and configuration.customization.system_prompt is not None
119+
else constants.DEFAULT_SYSTEM_PROMPT
120+
)
115121
date_today = datetime.now(tz=UTC).strftime("%B %d, %Y")
116122

117-
return _get_prompt_template().render(
123+
return _compile_prompt_template(prompt).render(
118124
date=date_today,
119125
os=systeminfo.os or "",
120126
version=systeminfo.version or "",
121127
arch=systeminfo.arch or "",
122128
)
123129

124130

125-
@functools.lru_cache(maxsize=8)
131+
@functools.lru_cache(maxsize=1)
126132
def _compile_prompt_template(prompt: str) -> jinja2.Template:
127133
"""Compile a Jinja2 template string inside a SandboxedEnvironment.
128134
@@ -147,25 +153,6 @@ def _compile_prompt_template(prompt: str) -> jinja2.Template:
147153
) from exc
148154

149155

150-
def _get_prompt_template() -> jinja2.Template:
151-
"""Resolve the system prompt from configuration and return the compiled template.
152-
153-
Delegates to the cached ``_compile_prompt_template`` so that identical
154-
prompt text is compiled only once, while configuration changes are
155-
picked up automatically.
156-
157-
Returns:
158-
The compiled Jinja2 Template ready for rendering.
159-
"""
160-
prompt = (
161-
configuration.customization.system_prompt
162-
if configuration.customization is not None
163-
and configuration.customization.system_prompt is not None
164-
else constants.DEFAULT_SYSTEM_PROMPT
165-
)
166-
return _compile_prompt_template(prompt)
167-
168-
169156
async def _get_default_model_id() -> str:
170157
"""Get the default model ID from configuration or auto-discovery.
171158
@@ -305,18 +292,6 @@ async def _call_llm(
305292
return cast(OpenAIResponseObject, response)
306293

307294

308-
def _get_cla_version(request: Request) -> str:
309-
"""Extract CLA version from User-Agent header."""
310-
return request.headers.get("User-Agent", "")
311-
312-
313-
def _get_configured_default_model_name() -> str:
314-
"""Get configured default model name for telemetry payloads."""
315-
if configuration.inference is None:
316-
return ""
317-
return configuration.inference.default_model or ""
318-
319-
320295
def _queue_splunk_event( # pylint: disable=too-many-arguments,too-many-positional-arguments
321296
background_tasks: BackgroundTasks,
322297
infer_request: RlsapiV1InferRequest,
@@ -348,11 +323,15 @@ def _queue_splunk_event( # pylint: disable=too-many-arguments,too-many-position
348323
question=infer_request.question,
349324
response=response_text,
350325
inference_time=inference_time,
351-
model=_get_configured_default_model_name(),
326+
model=(
327+
(configuration.inference.default_model or "")
328+
if configuration.inference is not None
329+
else ""
330+
),
352331
org_id=org_id,
353332
system_id=system_id,
354333
request_id=request_id,
355-
cla_version=_get_cla_version(request),
334+
cla_version=request.headers.get("User-Agent", ""),
356335
system_os=systeminfo.os,
357336
system_version=systeminfo.version,
358337
system_arch=systeminfo.arch,
@@ -475,24 +454,6 @@ def _record_inference_failure( # pylint: disable=too-many-arguments,too-many-po
475454
return inference_time
476455

477456

478-
def _is_verbose_enabled(infer_request: RlsapiV1InferRequest) -> bool:
479-
"""Check whether verbose metadata should be included in the response.
480-
481-
Verbose mode requires dual opt-in: the server configuration must allow it
482-
via ``allow_verbose_infer``, and the client must request it via the
483-
``include_metadata`` field.
484-
485-
Args:
486-
infer_request: The inference request to check.
487-
488-
Returns:
489-
True if both server config and client request enable verbose mode.
490-
"""
491-
return (
492-
configuration.rlsapi_v1.allow_verbose_infer and infer_request.include_metadata
493-
)
494-
495-
496457
def _resolve_quota_subject(request: Request, auth: AuthTuple) -> Optional[str]:
497458
"""Resolve the quota subject identifier based on rlsapi_v1 configuration.
498459
@@ -752,7 +713,9 @@ async def infer_endpoint( # pylint: disable=R0914,R0915
752713
)
753714

754715
start_time = time.monotonic()
755-
verbose_enabled = _is_verbose_enabled(infer_request)
716+
verbose_enabled = (
717+
configuration.rlsapi_v1.allow_verbose_infer and infer_request.include_metadata
718+
)
756719
logger.info(
757720
"Starting LLM call for rlsapi v1 request %s with verbose metadata enabled: %s",
758721
request_id,

0 commit comments

Comments
 (0)