Skip to content

Commit 657dcde

Browse files
authored
Merge pull request lightspeed-core#958 from tisnik/lcore-1137-store-info-into-token-usage-history
LCORE-1137: store info about consumed tokens into token usage history
2 parents 049a7ed + 12d6728 commit 657dcde

4 files changed

Lines changed: 28 additions & 1 deletion

File tree

src/app/endpoints/query.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -387,9 +387,12 @@ async def query_endpoint_handler_base( # pylint: disable=R0914
387387

388388
consume_tokens(
389389
configuration.quota_limiters,
390+
configuration.token_usage_history,
390391
user_id,
391392
input_tokens=token_usage.input_tokens,
392393
output_tokens=token_usage.output_tokens,
394+
model_id=model_id,
395+
provider_id=provider_id,
393396
)
394397

395398
store_conversation_into_cache(

src/app/endpoints/streaming_query_v2.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -275,9 +275,12 @@ async def response_generator( # pylint: disable=too-many-branches,too-many-stat
275275
)
276276
consume_tokens(
277277
configuration.quota_limiters,
278+
configuration.token_usage_history,
278279
context.user_id,
279280
input_tokens=token_usage.input_tokens,
280281
output_tokens=token_usage.output_tokens,
282+
model_id=context.model_id,
283+
provider_id=context.provider_id,
281284
)
282285
referenced_documents = parse_referenced_documents_from_responses_api(
283286
cast(OpenAIResponseObject, latest_response_object)

src/utils/quota.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,52 @@
11
"""Quota handling helper functions."""
22

3+
from typing import Optional
4+
35
import psycopg2
46
from fastapi import HTTPException
57

68
from log import get_logger
79
from models.responses import InternalServerErrorResponse, QuotaExceededResponse
810
from quota.quota_exceed_error import QuotaExceedError
911
from quota.quota_limiter import QuotaLimiter
12+
from quota.token_usage_history import TokenUsageHistory
1013

1114
logger = get_logger(__name__)
1215

1316

17+
# pylint: disable=R0913,R0917
1418
def consume_tokens(
1519
quota_limiters: list[QuotaLimiter],
20+
token_usage_history: Optional[TokenUsageHistory],
1621
user_id: str,
1722
input_tokens: int,
1823
output_tokens: int,
24+
model_id: str,
25+
provider_id: str,
1926
) -> None:
2027
"""Consume tokens from cluster and/or user quotas.
2128
2229
Parameters:
2330
quota_limiters: List of quota limiter instances to consume tokens from.
31+
token_usage_history: Optional instance of TokenUsageHistory class that records used tokens
2432
user_id: Identifier of the user consuming tokens.
2533
input_tokens: Number of input tokens to consume.
2634
output_tokens: Number of output tokens to consume.
35+
model_id: Model identification
36+
provider_id: Provider identification
2737
2838
Returns:
2939
None
3040
"""
41+
# record token usage history
42+
if token_usage_history is not None:
43+
token_usage_history.consume_tokens(
44+
user_id=user_id,
45+
provider=provider_id,
46+
model=model_id,
47+
input_tokens=input_tokens,
48+
output_tokens=output_tokens,
49+
)
3150
# consume tokens all configured quota limiters
3251
for quota_limiter in quota_limiters:
3352
quota_limiter.consume_tokens(

tests/integration/endpoints/test_query_v2_integration.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1150,7 +1150,9 @@ async def test_query_v2_endpoint_quota_integration(
11501150
mock_consume.assert_called_once()
11511151
consume_args = mock_consume.call_args
11521152
user_id, _, _, _ = test_auth
1153-
assert consume_args.args[1] == user_id
1153+
assert consume_args.args[2] == user_id
1154+
assert consume_args.kwargs["model_id"] == "test-model"
1155+
assert consume_args.kwargs["provider_id"] == "test-provider"
11541156
assert consume_args.kwargs["input_tokens"] == 100
11551157
assert consume_args.kwargs["output_tokens"] == 50
11561158

0 commit comments

Comments
 (0)