Skip to content

Commit ab47686

Browse files
committed
LCORE-1137: store info about consumed tokens into token usage history
1 parent 049a7ed commit ab47686

3 files changed

Lines changed: 23 additions & 0 deletions

File tree

src/app/endpoints/query.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -387,9 +387,12 @@ async def query_endpoint_handler_base( # pylint: disable=R0914
387387

388388
consume_tokens(
389389
configuration.quota_limiters,
390+
configuration.token_usage_history,
390391
user_id,
391392
input_tokens=token_usage.input_tokens,
392393
output_tokens=token_usage.output_tokens,
394+
model_id=model_id,
395+
provider_id=provider_id,
393396
)
394397

395398
store_conversation_into_cache(

src/app/endpoints/streaming_query_v2.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -275,9 +275,12 @@ async def response_generator( # pylint: disable=too-many-branches,too-many-stat
275275
)
276276
consume_tokens(
277277
configuration.quota_limiters,
278+
configuration.token_usage_history,
278279
context.user_id,
279280
input_tokens=token_usage.input_tokens,
280281
output_tokens=token_usage.output_tokens,
282+
model_id=context.model_id,
283+
provider_id=context.provider_id,
281284
)
282285
referenced_documents = parse_referenced_documents_from_responses_api(
283286
cast(OpenAIResponseObject, latest_response_object)

src/utils/quota.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,27 @@
11
"""Quota handling helper functions."""
22

3+
from typing import Optional
4+
35
import psycopg2
46
from fastapi import HTTPException
57

68
from log import get_logger
79
from models.responses import InternalServerErrorResponse, QuotaExceededResponse
810
from quota.quota_exceed_error import QuotaExceedError
911
from quota.quota_limiter import QuotaLimiter
12+
from quota.token_usage_history import TokenUsageHistory
1013

1114
logger = get_logger(__name__)
1215

1316

1417
def consume_tokens(
1518
quota_limiters: list[QuotaLimiter],
19+
token_usage_history: Optional[TokenUsageHistory],
1620
user_id: str,
1721
input_tokens: int,
1822
output_tokens: int,
23+
model_id: str,
24+
provider_id: str,
1925
) -> None:
2026
"""Consume tokens from cluster and/or user quotas.
2127
@@ -24,10 +30,21 @@ def consume_tokens(
2430
user_id: Identifier of the user consuming tokens.
2531
input_tokens: Number of input tokens to consume.
2632
output_tokens: Number of output tokens to consume.
33+
model_id: Model identification
34+
provider_id: Provider identification
2735
2836
Returns:
2937
None
3038
"""
39+
# record token usage history
40+
if token_usage_history is not None:
41+
token_usage_history.consume_tokens(
42+
user_id=user_id,
43+
provider=provider_id,
44+
model=model_id,
45+
input_tokens=input_tokens,
46+
output_tokens=output_tokens,
47+
)
3148
# consume tokens all configured quota limiters
3249
for quota_limiter in quota_limiters:
3350
quota_limiter.consume_tokens(

0 commit comments

Comments
 (0)