diff --git a/fastdeploy/engine/common_engine.py b/fastdeploy/engine/common_engine.py index 6e35399f7ac..045a3a3eb9a 100644 --- a/fastdeploy/engine/common_engine.py +++ b/fastdeploy/engine/common_engine.py @@ -1339,6 +1339,14 @@ def _insert_zmq_task_to_scheduler(self): request.metrics.scheduler_recv_req_time = time.time() main_process_metrics.inc_value("requests_number") + main_process_metrics.inc_value("prompt_tokens_total", request.prompt_token_ids_len) + main_process_metrics.obs_value("request_prompt_tokens", request.prompt_token_ids_len) + if getattr(request, "sampling_params", None) and getattr( + request.sampling_params, "max_tokens", None + ): + main_process_metrics.obs_value( + "request_params_max_tokens", request.sampling_params.max_tokens + ) trace_carrier = data.get("trace_carrier") if trace_carrier: request_id = get_base_request_id(data["request_id"]) diff --git a/fastdeploy/entrypoints/engine_client.py b/fastdeploy/entrypoints/engine_client.py index 961dcfb836f..4bc6813c54e 100644 --- a/fastdeploy/entrypoints/engine_client.py +++ b/fastdeploy/entrypoints/engine_client.py @@ -53,7 +53,6 @@ log_request, log_request_error, ) -from fastdeploy.metrics.metrics import main_process_metrics from fastdeploy.platforms import current_platform from fastdeploy.trace.constants import LoggingEventName from fastdeploy.trace.trace_logger import print as trace_print @@ -374,9 +373,7 @@ async def add_requests(self, task): if "messages" in task: task["messages"] = None - main_process_metrics.obs_value("request_params_max_tokens", task["max_tokens"]) - main_process_metrics.inc_value("prompt_tokens_total", input_ids_len) - main_process_metrics.obs_value("request_prompt_tokens", input_ids_len) + except Exception as e: log_request_error( message="request[{request_id}] add_requests error: {error}, {traceback}", diff --git a/tests/engine/test_common_engine.py b/tests/engine/test_common_engine.py index 4b539555a34..bdca2127474 100644 --- a/tests/engine/test_common_engine.py +++ b/tests/engine/test_common_engine.py @@ -1311,6 +1311,9 @@ class DummyMetrics: def __init__(self): self.requests_number = Mock(inc=Mock()) self.num_requests_waiting = Mock(inc=Mock()) + self.prompt_tokens_total = Mock(inc=Mock()) + self.request_prompt_tokens = Mock(observe=Mock()) + self.request_params_max_tokens = Mock(observe=Mock()) def inc_value(self, name, value=1, labelvalues=None): getattr(self, name).inc(value) @@ -2727,6 +2730,8 @@ def close(self): with patch("fastdeploy.engine.common_engine.Request") as MockRequest: mock_request = Mock() mock_request.metrics.scheduler_recv_req_time = 0 + mock_request.prompt_token_ids_len = 2 + mock_request.sampling_params = Mock(max_tokens=16) MockRequest.from_dict.return_value = mock_request with ( @@ -2756,6 +2761,8 @@ def close(self): with patch("fastdeploy.engine.common_engine.Request") as MockRequest: mock_request = Mock() mock_request.metrics.scheduler_recv_req_time = 0 + mock_request.prompt_token_ids_len = 2 + mock_request.sampling_params = Mock(max_tokens=16) MockRequest.from_dict.return_value = mock_request with ( @@ -3359,6 +3366,9 @@ class DummyMetrics: def __init__(self): self.requests_number = Mock(inc=Mock()) self.num_requests_waiting = Mock(inc=Mock()) + self.prompt_tokens_total = Mock(inc=Mock()) + self.request_prompt_tokens = Mock(observe=Mock()) + self.request_params_max_tokens = Mock(observe=Mock()) def inc_value(self, name, value=1, labelvalues=None): getattr(self, name).inc(value)