Skip to content

Commit 6ffde9c

Browse files
JenniferWangfacebook-github-bot
authored andcommitted
add tracer in v1 to log generator perf metrics
Summary: ## tl;dr Add tracer in v1 to log perf metrics to wandb ## V0 vs V1 Metrics Parity Comparison | Category | v0 Metric | v1 Metric | Parity | |----------|-----------|-----------|--------| | **Generate - Request Count** | `generator/generate/count_requests` (SUM) | `generator/generate/count_requests` (SUM) | ✅ Same | | **Generate - Completion Count** | `generator/generate/count_sequences_completed` (SUM) | `generator/generate/count_sequences_completed` (SUM) | ✅ Same | | **Generate - E2E Timing** | `generator_perf/generate/*` (Tracer, GPU) | `generator_perf/generate/*` (Tracer, GPU) | ✅ Same | | **Update - Pending Requests** | `generator_perf/update_weights/sum_pending_gen_requests` (SUM) | N/A - AsyncLLM handles internally | ⚠️ Skip (by design) | | **Update - Wait for Generation** | `generator_perf/update_weights/avg_waiting_for_generation_duration_s` (MEAN) | `generator_perf/update_weights/pause_generation_duration_s` (MEAN) | ✅ Equivalent - renamed for clarity | | **Update - Fetch Weights** | `generator_perf/update_weights/wait_fetch_weights` (MEAN) | `generator_perf/update_weights/worker_load_weights_duration_s` (MEAN) | ✅ Equivalent - renamed for clarity | | **Worker - Update Timing** | `generator_perf/update_weights/generator_worker_update/*` (trace, GPU) | `generator_perf/update_weights/generator_worker_update/*` (trace, GPU) | ✅ Same | ## Test Plan Main GRPO app: `python -m apps.grpo.main --config apps/grpo/qwen3_1_7b.yaml` ``` wandb: Run `wandb offline` to turn off syncing. wandb: Syncing run drawn-waterfall-686 wandb: ⭐️ View project at https://meta.wandb.io/jiyue/grpo-training wandb: 🚀 View run at https://meta.wandb.io/jiyue/grpo-training/runs/6pltx38p wandb: Detected [openai] in use. .... rvability.metric_actors.GlobalLoggingActor global_logger>] === [global_reduce] - METRICS STEP 1 === ... generator/generate/count_requests: 13.0 generator/generate/count_sequences_completed: 96.0 generator_perf/generate/total_duration_avg_s: 3.6518315022786463 generator_perf/generate/total_duration_max_s: 9.2080615234375 generator_perf/update_weights/pause_generation_duration_s: 2.8634108749683946 generator_perf/update_weights/resume_generation_duration_s: 1.918897032737732e-05 generator_perf/update_weights/worker_load_weights_duration_s: 3.506648204056546 ... ``` Make sure integration tests that do not initialize the tracer still works `pytest tests/integration_tests/test_generator_lifecycle.py -v -s` ## Next Steps [ ] implement the prefetch logic & shared memory [-] Add metric similar to generator v0 [ ] Perf/Throughput testing compared to generator v0 Differential Revision: D91038187
1 parent 981a5d6 commit 6ffde9c

2 files changed

Lines changed: 39 additions & 1 deletion

File tree

src/forge/actors/vllm/v1/forge_executor.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
import cloudpickle
3232
from forge.actors._torchstore_utils import extract_param_name, get_param_prefix
3333
from forge.actors.vllm.v1.monarch_executor import MonarchExecutor, WorkerWrapper
34+
from forge.observability.perf_tracker import trace
3435
from monarch.actor import endpoint
3536
from torchstore.client import LocalClient
3637

@@ -57,6 +58,11 @@ def set_torchstore_controller(self, controller) -> None:
5758
self._torchstore_client = None # Reset cached client
5859

5960
@endpoint
61+
@trace(
62+
prefix="generator_perf/update_weights/generator_worker_update",
63+
track_memory=False,
64+
timer="gpu",
65+
)
6066
def update_weights(self, version: int) -> int:
6167
"""Load weights directly from torchstore.
6268

src/forge/actors/vllm/v1/generator.py

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import base64
1010
import logging
1111
import os
12+
import time
1213
import uuid
1314
from collections.abc import Mapping
1415
from dataclasses import dataclass, field
@@ -19,6 +20,10 @@
1920
from forge.controller import ForgeActor
2021
from forge.controller.provisioner import _get_provisioner
2122
from forge.data_models.completion import Completion
23+
from forge.env import FORGE_DISABLE_METRICS
24+
from forge.observability.metric_actors import get_or_create_metric_logger
25+
from forge.observability.metrics import record_metric, Reduce
26+
from forge.observability.perf_tracker import Tracer
2227
from monarch.actor import endpoint, this_host
2328
from torchstore.api import _controller as get_torchstore_controller
2429
from vllm.engine.arg_utils import EngineArgs
@@ -136,6 +141,10 @@ async def launch( # pyright: ignore[reportIncompatibleMethodOverride]
136141
)
137142
logger.info("[Generator.launch] Spawned generator_proc on head host")
138143

144+
# Register LocalFetcherActor for generator_proc to enable metrics collection
145+
if not FORGE_DISABLE_METRICS.get_value():
146+
await get_or_create_metric_logger(generator_proc, process_name=mesh_name)
147+
139148
# Import WorkerRegistry here to avoid circular import with monarch_executor
140149
from forge.actors.vllm.v1.monarch_executor import WorkerRegistry
141150

@@ -251,6 +260,10 @@ async def generate(
251260
Returns:
252261
list[Completion]: n completions from vLLM based on your prompt.
253262
"""
263+
t = Tracer("generator_perf/generate", timer="gpu")
264+
t.start()
265+
record_metric("generator/generate/count_requests", 1, Reduce.SUM)
266+
254267
if self.llm is None:
255268
raise RuntimeError("Generator not initialized. Call setup() first.")
256269

@@ -271,6 +284,12 @@ async def generate(
271284

272285
completions = self._to_completions(request_output, prompt)
273286

287+
record_metric(
288+
"generator/generate/count_sequences_completed",
289+
len(completions),
290+
Reduce.SUM,
291+
)
292+
t.stop()
274293
return completions
275294

276295
@endpoint
@@ -341,17 +360,30 @@ async def update_weights(
341360

342361
logger.info(f"Starting weight update to v{version}")
343362

363+
pause_start = time.perf_counter()
344364
await self.llm.pause_generation(
345365
wait_for_inflight_requests=True, clear_cache=True
346366
)
367+
pause_duration = time.perf_counter() - pause_start
368+
record_metric(
369+
"generator_perf/update_weights/pause_generation_duration_s",
370+
pause_duration,
371+
Reduce.MEAN,
372+
)
347373

348374
try:
375+
load_start = time.perf_counter()
349376
await self.workers.update_weights.call(version)
377+
load_duration = time.perf_counter() - load_start
378+
record_metric(
379+
"generator_perf/update_weights/worker_load_weights_duration_s",
380+
load_duration,
381+
Reduce.MEAN,
382+
)
350383
self.generator_version = version
351384
logger.info(f"Updated weights from torchstore v{version}")
352385
finally:
353386
await self.llm.resume_generation()
354-
355387
logger.info(f"Weight update complete, now v{version}")
356388

357389
@endpoint

0 commit comments

Comments
 (0)