Skip to content

Commit d454817

Browse files
committed
fix: plumb LLM token counts into rlsapi Splunk telemetry
total_llm_tokens was hardcoded to 0 in the rlsapi Splunk event builder despite token counting being implemented via extract_token_usage(). Add input_tokens and output_tokens to InferenceEventData and pass actual counts from the endpoint handler. Ref: RSPEED-2857 Signed-off-by: Major Hayden <major@redhat.com>
1 parent fe35459 commit d454817

3 files changed

Lines changed: 36 additions & 2 deletions

File tree

src/app/endpoints/rlsapi_v1.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -353,6 +353,8 @@ def _queue_splunk_event( # pylint: disable=too-many-arguments,too-many-position
353353
response_text: str,
354354
inference_time: float,
355355
sourcetype: str,
356+
input_tokens: int = 0,
357+
output_tokens: int = 0,
356358
) -> None:
357359
"""Build and queue a Splunk telemetry event for background sending."""
358360
org_id, system_id = _get_rh_identity_context(request)
@@ -370,6 +372,8 @@ def _queue_splunk_event( # pylint: disable=too-many-arguments,too-many-position
370372
system_os=systeminfo.os,
371373
system_version=systeminfo.version,
372374
system_arch=systeminfo.arch,
375+
input_tokens=input_tokens,
376+
output_tokens=output_tokens,
373377
)
374378

375379
event = build_inference_event(event_data)
@@ -754,6 +758,8 @@ async def infer_endpoint( # pylint: disable=R0914
754758
response_text,
755759
inference_time,
756760
"infer_with_llm",
761+
input_tokens=token_usage.input_tokens,
762+
output_tokens=token_usage.output_tokens,
757763
)
758764

759765
logger.info("Completed rlsapi v1 /infer request %s", request_id)

src/observability/formats/rlsapi.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ class InferenceEventData: # pylint: disable=too-many-instance-attributes
2626
system_os: str
2727
system_version: str
2828
system_arch: str
29+
input_tokens: int = 0
30+
output_tokens: int = 0
2931

3032

3133
def build_inference_event(data: InferenceEventData) -> dict[str, Any]:
@@ -47,8 +49,7 @@ def build_inference_event(data: InferenceEventData) -> dict[str, Any]:
4749
"deployment": configuration.deployment_environment,
4850
"org_id": data.org_id,
4951
"system_id": data.system_id,
50-
# Token counting not yet implemented in lightspeed-stack; rlsapi uses 0 as default
51-
"total_llm_tokens": 0,
52+
"total_llm_tokens": data.input_tokens + data.output_tokens,
5253
"request_id": data.request_id,
5354
"cla_version": data.cla_version,
5455
"system_os": data.system_os,

tests/unit/observability/formats/test_rlsapi.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,33 @@ def test_builds_event_with_all_fields(
5050
assert event["total_llm_tokens"] == 0
5151

5252

53+
def test_builds_event_with_token_counts(mocker: MockerFixture) -> None:
54+
"""Test total_llm_tokens is computed from input and output token counts."""
55+
mocker.patch(
56+
"observability.formats.rlsapi.configuration"
57+
).deployment_environment = "production"
58+
59+
data = InferenceEventData(
60+
question="test",
61+
response="test",
62+
inference_time=1.0,
63+
model="test-model",
64+
org_id="org1",
65+
system_id="sys1",
66+
request_id="req_1",
67+
cla_version="CLA/1.0",
68+
system_os="RHEL",
69+
system_version="9.4",
70+
system_arch="x86_64",
71+
input_tokens=150,
72+
output_tokens=75,
73+
)
74+
75+
event = build_inference_event(data)
76+
77+
assert event["total_llm_tokens"] == 225
78+
79+
5380
def test_handles_auth_disabled_values(mocker: MockerFixture) -> None:
5481
"""Test event handles auth_disabled placeholder values."""
5582
data = InferenceEventData(

0 commit comments

Comments
 (0)