@@ -112,17 +112,23 @@ def _build_instructions(systeminfo: RlsapiV1SystemInfo) -> str:
112112 Returns:
113113 The rendered instructions string for the LLM.
114114 """
115+ prompt = (
116+ configuration .customization .system_prompt
117+ if configuration .customization is not None
118+ and configuration .customization .system_prompt is not None
119+ else constants .DEFAULT_SYSTEM_PROMPT
120+ )
115121 date_today = datetime .now (tz = UTC ).strftime ("%B %d, %Y" )
116122
117- return _get_prompt_template ( ).render (
123+ return _compile_prompt_template ( prompt ).render (
118124 date = date_today ,
119125 os = systeminfo .os or "" ,
120126 version = systeminfo .version or "" ,
121127 arch = systeminfo .arch or "" ,
122128 )
123129
124130
125- @functools .lru_cache (maxsize = 8 )
131+ @functools .lru_cache (maxsize = 1 )
126132def _compile_prompt_template (prompt : str ) -> jinja2 .Template :
127133 """Compile a Jinja2 template string inside a SandboxedEnvironment.
128134
@@ -147,25 +153,6 @@ def _compile_prompt_template(prompt: str) -> jinja2.Template:
147153 ) from exc
148154
149155
150- def _get_prompt_template () -> jinja2 .Template :
151- """Resolve the system prompt from configuration and return the compiled template.
152-
153- Delegates to the cached ``_compile_prompt_template`` so that identical
154- prompt text is compiled only once, while configuration changes are
155- picked up automatically.
156-
157- Returns:
158- The compiled Jinja2 Template ready for rendering.
159- """
160- prompt = (
161- configuration .customization .system_prompt
162- if configuration .customization is not None
163- and configuration .customization .system_prompt is not None
164- else constants .DEFAULT_SYSTEM_PROMPT
165- )
166- return _compile_prompt_template (prompt )
167-
168-
169156async def _get_default_model_id () -> str :
170157 """Get the default model ID from configuration or auto-discovery.
171158
@@ -305,18 +292,6 @@ async def _call_llm(
305292 return cast (OpenAIResponseObject , response )
306293
307294
308- def _get_cla_version (request : Request ) -> str :
309- """Extract CLA version from User-Agent header."""
310- return request .headers .get ("User-Agent" , "" )
311-
312-
313- def _get_configured_default_model_name () -> str :
314- """Get configured default model name for telemetry payloads."""
315- if configuration .inference is None :
316- return ""
317- return configuration .inference .default_model or ""
318-
319-
320295def _queue_splunk_event ( # pylint: disable=too-many-arguments,too-many-positional-arguments
321296 background_tasks : BackgroundTasks ,
322297 infer_request : RlsapiV1InferRequest ,
@@ -348,11 +323,15 @@ def _queue_splunk_event( # pylint: disable=too-many-arguments,too-many-position
348323 question = infer_request .question ,
349324 response = response_text ,
350325 inference_time = inference_time ,
351- model = _get_configured_default_model_name (),
326+ model = (
327+ (configuration .inference .default_model or "" )
328+ if configuration .inference is not None
329+ else ""
330+ ),
352331 org_id = org_id ,
353332 system_id = system_id ,
354333 request_id = request_id ,
355- cla_version = _get_cla_version ( request ),
334+ cla_version = request . headers . get ( "User-Agent" , "" ),
356335 system_os = systeminfo .os ,
357336 system_version = systeminfo .version ,
358337 system_arch = systeminfo .arch ,
@@ -475,24 +454,6 @@ def _record_inference_failure( # pylint: disable=too-many-arguments,too-many-po
475454 return inference_time
476455
477456
478- def _is_verbose_enabled (infer_request : RlsapiV1InferRequest ) -> bool :
479- """Check whether verbose metadata should be included in the response.
480-
481- Verbose mode requires dual opt-in: the server configuration must allow it
482- via ``allow_verbose_infer``, and the client must request it via the
483- ``include_metadata`` field.
484-
485- Args:
486- infer_request: The inference request to check.
487-
488- Returns:
489- True if both server config and client request enable verbose mode.
490- """
491- return (
492- configuration .rlsapi_v1 .allow_verbose_infer and infer_request .include_metadata
493- )
494-
495-
496457def _resolve_quota_subject (request : Request , auth : AuthTuple ) -> Optional [str ]:
497458 """Resolve the quota subject identifier based on rlsapi_v1 configuration.
498459
@@ -752,7 +713,9 @@ async def infer_endpoint( # pylint: disable=R0914,R0915
752713 )
753714
754715 start_time = time .monotonic ()
755- verbose_enabled = _is_verbose_enabled (infer_request )
716+ verbose_enabled = (
717+ configuration .rlsapi_v1 .allow_verbose_infer and infer_request .include_metadata
718+ )
756719 logger .info (
757720 "Starting LLM call for rlsapi v1 request %s with verbose metadata enabled: %s" ,
758721 request_id ,
0 commit comments