diff --git a/docs/design-docs/nemo-gym-integration.md b/docs/design-docs/nemo-gym-integration.md index 33e324547b..ce57c9e659 100644 --- a/docs/design-docs/nemo-gym-integration.md +++ b/docs/design-docs/nemo-gym-integration.md @@ -181,7 +181,7 @@ sequenceDiagram GRPO->>Policy: Compute loss and train ``` -> **NeMo Gym server types** (see [Core Components](https://docs.nvidia.com/nemo/gym/latest/about/concepts/core-components.html)): +> **NeMo Gym server types** (see [Core Components](https://docs.nvidia.com/nemo/gym/about/core-components)): > - **Agent Server**: Orchestrates the rollout loop > - **Model Server**: HTTP proxy to vLLM; translates Responses API ↔ Chat Completions > - **Resource Server**: Provides tools and rewards diff --git a/examples/configs/grpo_math_1B.yaml b/examples/configs/grpo_math_1B.yaml index 45582eb591..e109aa4581 100644 --- a/examples/configs/grpo_math_1B.yaml +++ b/examples/configs/grpo_math_1B.yaml @@ -391,3 +391,17 @@ logger: cluster: gpus_per_node: 1 num_nodes: 1 + +# TransferQueue-mediated data plane for sync GRPO. +# Off by default — the legacy grpo_train trainer never engages this. +# Flip enabled=true and run grpo_train_sync to use TQ-mediated bulk +# transfer between rollout and train. See nemo_rl/data_plane/README.md. +data_plane: + enabled: false + impl: transfer_queue + # backend: "simple" # NotRequired: TQ storage backend ('simple' or 'mooncake_cpu') + # storage_capacity: 1000000 # NotRequired + # num_storage_units: 2 # NotRequired + # claim_meta_poll_interval_s: 0.5 # NotRequired: blocking-claim poll cadence + # observability: # NotRequired + # enabled: false diff --git a/examples/run_grpo.py b/examples/run_grpo.py index b8f6025067..259491f734 100644 --- a/examples/run_grpo.py +++ b/examples/run_grpo.py @@ -99,6 +99,20 @@ def main() -> None: val_task_to_env, ) = setup_response_data(tokenizer, config["data"], config["env"]) + # Pick the policy factory at the launcher level so the legacy trainer + # stays data-plane-agnostic (architectural invariant — see + # tests/data_plane/unit/test_architecture_invariants.py). + _dp_cfg = config.get("data_plane") or {} + if _dp_cfg.get("enabled", False): + from nemo_rl.models.policy.tq_policy import TQPolicy + + def _make_policy(**kwargs): + return TQPolicy(**kwargs, dp_cfg=_dp_cfg) + + _policy_factory = _make_policy + else: + _policy_factory = None # setup() defaults to plain Policy + ( policy, policy_generation, @@ -110,7 +124,13 @@ def main() -> None: checkpointer, grpo_state, master_config, - ) = setup(config, tokenizer, dataset, val_dataset) + ) = setup( + config, + tokenizer, + dataset, + val_dataset, + policy_factory=_policy_factory, + ) # Check if async mode is enabled if "async_grpo" in config["grpo"] and config["grpo"]["async_grpo"]["enabled"]: @@ -164,10 +184,22 @@ def main() -> None: max_trajectory_age_steps=async_config["max_trajectory_age_steps"], ) else: - print("šŸš€ Running synchronous GRPO training") - - # Run standard GRPO training - grpo_train( + # Two parallel synchronous trainers (verl-style — main_ppo.py vs + # main_ppo_sync.py). data_plane.enabled selects which one runs: + # the legacy in-memory path or the TransferQueue-mediated fork. + # Same model, same data, same seed → diff the wandb runs to + # validate parity. + dp_cfg = master_config.get("data_plane", {}) + if dp_cfg.get("enabled", False): + from nemo_rl.algorithms.grpo_sync import grpo_train_sync + + print("šŸš€ Running synchronous GRPO training (TransferQueue)") + trainer = grpo_train_sync + else: + print("šŸš€ Running synchronous GRPO training (legacy)") + trainer = grpo_train + + trainer( policy, policy_generation, dataloader, diff --git a/nemo_rl/algorithms/grpo.py b/nemo_rl/algorithms/grpo.py index 8dbd9afbd4..06981b9c5f 100644 --- a/nemo_rl/algorithms/grpo.py +++ b/nemo_rl/algorithms/grpo.py @@ -17,7 +17,7 @@ import warnings from concurrent.futures import ThreadPoolExecutor from contextlib import nullcontext -from typing import Any, NotRequired, Optional, TypedDict, TypeVar, cast +from typing import Any, Callable, NotRequired, Optional, TypedDict, TypeVar, cast import numpy as np import ray @@ -58,6 +58,7 @@ get_keys_from_message_log, ) from nemo_rl.data.utils import extract_necessary_env_names +from nemo_rl.data_plane.interfaces import DataPlaneConfig from nemo_rl.distributed.batched_data_dict import BatchedDataDict from nemo_rl.distributed.ray_actor_environment_registry import get_actor_python_env from nemo_rl.distributed.virtual_cluster import ClusterConfig, RayVirtualCluster @@ -206,6 +207,7 @@ class MasterConfig(TypedDict): logger: GRPOLoggerConfig cluster: ClusterConfig checkpointing: CheckpointingConfig + data_plane: NotRequired[DataPlaneConfig] # =============================================================================== @@ -219,6 +221,7 @@ def setup( dataset: AllTaskProcessedDataset | dict[str, AllTaskProcessedDataset], val_dataset: Optional[AllTaskProcessedDataset], processor: Optional[AutoProcessor] = None, + policy_factory: Optional[Callable[..., ColocatablePolicyInterface]] = None, ) -> tuple[ ColocatablePolicyInterface, Optional[GenerationInterface], @@ -582,10 +585,15 @@ def init_train_dataloader(dataset, suffix: str = ""): "(reference model is not loaded)." ) + # Caller-supplied factory lets the sync trainer swap in a TQ-mediated + # Policy subclass without this shared setup needing to know the data + # plane exists. Default is the plain Policy class — legacy behavior. + _make_policy = policy_factory if policy_factory is not None else Policy + def init_policy(): """Initialize policy training workers.""" t0 = time.perf_counter() - p = Policy( + p = _make_policy( cluster=train_cluster, config=policy_config, tokenizer=tokenizer, @@ -2554,7 +2562,7 @@ def async_grpo_train( ) replay_buffer = ReplayBuffer.options(runtime_env=_replay_runtime_env).remote( - max_size=optimal_buffer_size + max_size=optimal_buffer_size, ) _tc_py_exec = get_actor_python_env( diff --git a/nemo_rl/algorithms/grpo_sync.py b/nemo_rl/algorithms/grpo_sync.py new file mode 100644 index 0000000000..59b042cc32 --- /dev/null +++ b/nemo_rl/algorithms/grpo_sync.py @@ -0,0 +1,1137 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""GRPO trainer — TransferQueue-mediated path (sync). + +Sibling fork of ``nemo_rl.algorithms.grpo``. Each file has zero +internal branching on whether TQ is engaged; the example script +chooses one or the other based on ``data_plane.enabled``. + +Setup, helpers, and ``validate`` are re-imported from ``grpo``; only the +training loop body is duplicated here so the per-step lifecycle hooks +(register / seed-put / per-rank fetch / clear) can live in straight +sequential code. + +Parity with the legacy path is verified by running the same config +against both entrypoints and diffing the wandb runs. +""" + +from __future__ import annotations + +import os +import uuid +import warnings +from typing import Any, Optional + +import numpy as np +import ray +import torch +from torchdata.stateful_dataloader import StatefulDataLoader + +# Re-imports from grpo so this file is a thin trainer-only fork. +from nemo_rl.algorithms.grpo import ( + GRPOSaveState, + MasterConfig, + _create_advantage_estimator, + _log_mixed_rewards_and_advantages_information, + _should_log_nemo_gym_responses, + compute_and_apply_seq_logprob_error_masking, + refit_policy_generation, + scale_rewards, + validate, +) +from nemo_rl.algorithms.loss import ( + ClippedPGLossDataDict, +) +from nemo_rl.algorithms.loss.interfaces import LossFunction +from nemo_rl.algorithms.reward_functions import apply_reward_shaping +from nemo_rl.algorithms.utils import ( + calculate_baseline_and_std_per_prompt, + get_gdpo_reward_component_keys, + log_generation_metrics_to_wandb, + print_performance_metrics, +) +from nemo_rl.data.interfaces import DatumSpec +from nemo_rl.data.llm_message_utils import batched_message_log_to_flat_message +from nemo_rl.data_plane.column_io import read_columns, write_columns +from nemo_rl.data_plane.interfaces import DataPlaneClient, KVBatchMeta +from nemo_rl.data_plane.schema import DP_CALIB_EXCLUDED_FIELDS +from nemo_rl.distributed.batched_data_dict import BatchedDataDict +from nemo_rl.environments.interfaces import EnvironmentInterface +from nemo_rl.experience.sync_rollout_actor import SyncRolloutActor +from nemo_rl.models.generation.interfaces import GenerationInterface +from nemo_rl.models.policy.interfaces import ColocatablePolicyInterface +from nemo_rl.utils.checkpoint import CheckpointManager +from nemo_rl.utils.logger import Logger +from nemo_rl.utils.memory_tracker import MemoryTracker +from nemo_rl.utils.nsys import maybe_gpu_profile_step +from nemo_rl.utils.timer import TimeoutChecker, Timer +from nemo_rl.utils.venvs import make_actor_runtime_env + +# ── DAPO non-zero-std dynamic sampling, slice-only ───────────────────── +# Slice-only formulation of nemo_rl.algorithms.grpo.dynamic_sampling: filter +# on std != 0, accumulate survivors across iterations, slice on overflow. +# Bulk in TQ untouched except for kv_clear of dropped/discarded uids. + +_DSlice = BatchedDataDict[Any] + + +def _apply_dynamic_sampling( + *, + meta: KVBatchMeta, + slice_data: _DSlice, + pending_meta: Optional[KVBatchMeta], + pending_slice: Optional[_DSlice], + pending_unfiltered_rewards: list[torch.Tensor], + train_prompts_size: int, + num_gen_batches: int, + max_gen_batches: int, + dp_client: DataPlaneClient, +) -> tuple[ + Optional[KVBatchMeta], + Optional[_DSlice], + list[torch.Tensor], + bool, + dict[str, Any], + Optional[torch.Tensor], +]: + """Process one dynamic-sampling iteration. + + Drops zero-std (filtered) keys, merges survivors into the running + pending cache, and reports whether the cache has reached + ``train_prompts_size``. When complete, the returned ``pending_*`` IS + the training batch. + + Args: + meta: This iteration's ``KVBatchMeta``. + slice_data: Per-sample driver-side slice for this iteration. + pending_meta: Survivors accumulated from prior iterations. + pending_slice: Slice data for ``pending_meta``. + pending_unfiltered_rewards: All iterations' rewards pre-filter, + for legacy reward metric parity. + train_prompts_size: Target batch size. + num_gen_batches: Iteration counter (1-based). + max_gen_batches: Upper bound on iterations before raising. + dp_client: Data-plane client used to clear filtered keys. + + Returns: + ``(pending_meta, pending_slice, pending_rewards, is_complete, + ds_metrics, unfiltered_for_log)``. + """ + # Cumulative unfiltered total_reward for legacy metrics["reward"] + # parity. Reference-only append (no copy) — slice tensors are + # produced fresh per iteration, not aliased to TQ-owned bulk. + pending_unfiltered_rewards.append(slice_data["total_reward"]) + + keep_mask = slice_data["std"] != 0.0 + keep_idx = keep_mask.nonzero(as_tuple=True)[0].tolist() + drop_keys = [k for k, keep in zip(meta.keys, keep_mask.tolist()) if not keep] + if drop_keys: + dp_client.kv_clear(keys=drop_keys, partition_id=meta.partition_id) + + # Subset this iteration's survivors and merge into the running cache. + if keep_idx: + km = meta.subset(keep_idx) + ks = slice_data.select_indices(keep_idx) + ks["filtered_reward"] = ks["total_reward"] + if pending_meta is None: + pending_meta, pending_slice = km, ks + else: + assert pending_slice is not None + pending_meta = pending_meta.concat(km) + pending_slice = BatchedDataDict.from_batches([pending_slice, ks]) + + n = len(pending_meta.keys) if pending_meta is not None else 0 + if n < train_prompts_size: + if num_gen_batches > max_gen_batches: + raise ValueError( + f"Dynamic sampling reached max_gen_batches={max_gen_batches}. " + f"Increase grpo.dynamic_sampling_max_gen_batches or revisit " + f"data diversity / num_prompts_per_step / num_generations_per_prompt." + ) + return pending_meta, pending_slice, pending_unfiltered_rewards, False, {}, None + + ds_metrics: dict[str, Any] = {"dynamic_sampling_num_gen_batches": num_gen_batches} + if n > train_prompts_size: + assert pending_meta is not None and pending_slice is not None + dp_client.kv_clear( + keys=list(pending_meta.keys[train_prompts_size:]), + partition_id=pending_meta.partition_id, + ) + pending_meta = pending_meta.slice(0, train_prompts_size) + pending_slice = pending_slice.slice(0, train_prompts_size) + ds_metrics["dynamic_sampling_num_discarded_valid_samples"] = ( + n - train_prompts_size + ) + + unfiltered_for_log = torch.cat(pending_unfiltered_rewards)[:train_prompts_size] + return pending_meta, pending_slice, [], True, ds_metrics, unfiltered_for_log + + +def grpo_train_sync( + policy: ColocatablePolicyInterface, + policy_generation: Optional[GenerationInterface], + wrapped_dataloader, + val_dataloader: Optional[StatefulDataLoader], + tokenizer, + loss_fn: LossFunction, + task_to_env: dict[str, EnvironmentInterface], + val_task_to_env: Optional[dict[str, EnvironmentInterface]], + logger: Logger, + checkpointer: CheckpointManager, + grpo_save_state: GRPOSaveState, + master_config: MasterConfig, +) -> None: + """Run GRPO training algorithm — TransferQueue-mediated. + + Body mirrors :func:`nemo_rl.algorithms.grpo.grpo_train` with TQ-mediated + Policy methods substituting the in-memory dispatch. The TQ lifecycle + (controller bootstrap, worker attach, partition register, fan-out, + drain, close) is fully encapsulated in + :class:`nemo_rl.models.policy.tq_policy.TQPolicy` — this trainer just + calls ``policy.prepare_step``, ``policy.get_logprobs``, + ``policy.get_reference_policy_logprobs``, and ``policy.train``. + + Parity with the legacy path is verified by running the same config + against both entrypoints and diffing the wandb runs. + """ + timer = Timer() + timeout = TimeoutChecker( + timeout=master_config["checkpointing"]["checkpoint_must_save_by"], + fit_last_save_time=True, + ) + timeout.start_iterations() + memory_tracker = MemoryTracker() + + kv_scales_cache = None # Cache reused for computed kv scales + + NEED_REFIT = True + # If policy_generation is None, use the policy as the generation interface (megatron framework backend) + if policy_generation is None: + policy_generation = policy # type: ignore + NEED_REFIT = False + POLICY_GENERATION_STALE = True + assert policy_generation is not None # for mypy type check + + if master_config["grpo"].get("skip_reference_policy_logprobs_calculation"): + assert master_config["loss_fn"]["reference_policy_kl_penalty"] == 0 + print( + "Reference policy logprob calculation will be skipped since `grpo.skip_reference_policy_logprobs_calculation` is set to True and `loss_fn.reference_policy_kl_penalty` is 0." + ) + + sync_kv_scales = getattr(policy_generation, "requires_kv_scale_sync", False) + + current_step = grpo_save_state["current_step"] + total_steps = grpo_save_state["total_steps"] + max_num_steps = master_config["grpo"]["max_num_steps"] + current_epoch = grpo_save_state["current_epoch"] + max_num_epochs = master_config["grpo"]["max_num_epochs"] + consumed_samples = grpo_save_state["consumed_samples"] + total_valid_tokens = grpo_save_state.get("total_valid_tokens", 0) + val_at_start = master_config["grpo"]["val_at_start"] + val_at_end = master_config["grpo"]["val_at_end"] + val_period = master_config["grpo"]["val_period"] + colocated_inference = master_config["policy"]["generation"]["colocated"]["enabled"] + + adv_estimator = _create_advantage_estimator(master_config) + + # ── Data-plane setup (mandatory in the sync trainer) ─────────────── + # Sync trainer requires a TQ-mediated policy. The TQPolicy ctor + # bootstraps the controller and attaches workers; ``policy.dp_cfg`` + # is the public marker. The explicit master_config check is the + # entry-guard so users running this trainer with the legacy policy + # see a clear error rather than an opaque AttributeError. + dp_cfg = master_config.get("data_plane") + if not dp_cfg or not dp_cfg["enabled"]: + raise ValueError( + "grpo_train_sync requires master_config['data_plane']['enabled']=True. " + "Use the legacy nemo_rl.algorithms.grpo.grpo_train trainer if you don't " + "want TransferQueue." + ) + + # Driver-side pad-value dict for materialize() — the wire emits + # jagged tensors for variable-length token fields (input_ids, + # prompt_ids_for_adv); other fields default to pad=0. + _pad_dict = { + "input_ids": tokenizer.pad_token_id, + "prompt_ids_for_adv": tokenizer.pad_token_id, + } + if not hasattr(policy, "dp_cfg"): + raise ValueError( + "grpo_train_sync requires a TQ-mediated policy " + "(nemo_rl.models.policy.tq_policy.TQPolicy). examples/run_grpo.py " + "constructs it via the policy_factory when data_plane.enabled=True." + ) + + # TQ-resident tensors live on CPU; baseline/std are computed on the + # slice without a CUDA hop. The flag is a no-op here — warn so users + # don't expect it to do anything. + if master_config["grpo"].get("calculate_advantages_on_gpu"): + warnings.warn( + "grpo.calculate_advantages_on_gpu has no effect when " + "data_plane.enabled=true; baseline/std are computed on CPU " + "because TQ-resident tensors are CPU-side.", + stacklevel=2, + ) + + # ── Sync rollout actor (rollout 1-hop put) ────────────────────── + # The actor owns the multi-turn rollout loop AND post-rollout + # flatten / mask construction / prompt extraction / baseline-std / + # TQ first-write. Bulk tensors stay actor-side until kv_batch_put; + # driver receives only KVBatchMeta + small slice via Ray. + rollout_actor = SyncRolloutActor.options( + runtime_env=make_actor_runtime_env( + "nemo_rl.experience.sync_rollout_actor.SyncRolloutActor" + ), + ).remote( + policy_generation=policy_generation, + tokenizer=tokenizer, + task_to_env=task_to_env, + master_config=master_config, + dp_cfg=dp_cfg, + ) + + if val_at_start and current_step == 0: + print("\nšŸ” Running initial validation...", flush=True) + memory_tracker.snapshot_start_of_stage("Initial validation", dir()) + + if NEED_REFIT and POLICY_GENERATION_STALE: + refit_policy_generation(policy, policy_generation, colocated_inference) + POLICY_GENERATION_STALE = False + else: + policy_generation.prepare_for_generation() + val_metrics, validation_timings = validate( + policy_generation, + val_dataloader, + tokenizer, + val_task_to_env, + step=0, + master_config=master_config, + logger=logger, + ) + policy_generation.finish_generation() + logger.log_metrics(val_metrics, current_step, prefix="validation") + logger.log_metrics(validation_timings, current_step, prefix="timing/validation") + + if master_config["data"]["use_multiple_dataloader"]: + warnings.warn( + "When using multiple dataloaders, MultipleDataloaderWrapper operates as an infinite iterator. " + "As a result, grpo.max_num_epochs will be ignored, and only grpo.max_num_steps will be used." + ) + + while current_epoch < max_num_epochs and total_steps < max_num_steps: + memory_tracker.snapshot_start_of_stage("Preparing batch", dir()) + print(f"\n{'=' * 25} Epoch {current_epoch + 1}/{max_num_epochs} {'=' * 25}") + # 1-hop cross-iteration cache for dynamic_sampling: across + # multiple inner iterations we accumulate non-zero-std prompts + # until we have enough for a full training batch. The TQ + # payload of pending uids remains alive until either consumed + # by training (kv_clear at step end) or evicted on overflow. + # ``pending_unfiltered_rewards`` is logging-only — preserves + # legacy ``metrics["reward"]`` semantics (cumulative unfiltered + # total_reward across all contributing iterations). + pending_meta = None + pending_slice: Optional[_DSlice] = None + pending_unfiltered_rewards: list[torch.Tensor] = [] + dynamic_sampling_num_gen_batches = 0 + + for batch in wrapped_dataloader: + metrics_logging_data: dict = {} + metrics: dict = {} + + if master_config["data"]["use_multiple_dataloader"]: + print( + f"\n{'=' * 25} Step {current_step + 1}/{max_num_steps} {'=' * 25}", + flush=True, + ) + else: + print( + f"\n{'=' * 25} Step {current_step + 1}/{min(len(wrapped_dataloader), max_num_steps)} {'=' * 25}", + flush=True, + ) + + maybe_gpu_profile_step(policy, total_steps + 1) + if policy != policy_generation: + maybe_gpu_profile_step(policy_generation, total_steps + 1) + val_metrics, validation_timings = None, None + + with timer.time("total_step_time"): + print("ā–¶ Preparing batch...", flush=True) + with timer.time("data_processing"): + repeated_batch: BatchedDataDict[DatumSpec] = ( + batch.repeat_interleave( + master_config["grpo"]["num_generations_per_prompt"] + ) + ) + + memory_tracker.snapshot_start_of_stage("Generation", dir()) + print( + f"ā–¶ Generating responses for batch of size {repeated_batch.size}...", + flush=True, + ) + with timer.time("prepare_for_generation/total"): + if NEED_REFIT and POLICY_GENERATION_STALE: + if sync_kv_scales and kv_scales_cache is None: + # KV-scale calibration uses message_log of the + # current step's PROMPTS (pre-generation), which + # is small and lives on the driver naturally. + # Unrelated to the rollout 1-hop put. + print("ā–¶ Computing KV cache scales...", flush=True) + policy.prepare_for_lp_inference() + calib_flat, calib_input_lengths = ( + batched_message_log_to_flat_message( + repeated_batch["message_log"], + pad_value_dict={ + "token_ids": tokenizer.pad_token_id + }, + make_sequence_length_divisible_by=master_config[ + "policy" + ]["make_sequence_length_divisible_by"], + ) + ) + calibration_data = BatchedDataDict[ClippedPGLossDataDict]( + { + "input_ids": calib_flat["token_ids"], + "input_lengths": calib_input_lengths, + } + ) + calibration_data.update( + calib_flat.get_multimodal_dict(as_tensors=False) + ) + calibration_data.to("cpu") + kv_scales_cache = policy.calibrate_qkv_fp8_scales( + calibration_data, include_q=True + )["layers"] + + refit_policy_generation( + policy, + policy_generation, + colocated_inference, + timer=timer, + kv_scales=kv_scales_cache if sync_kv_scales else None, + ) + POLICY_GENERATION_STALE = False + else: + if colocated_inference: + policy.offload_after_refit() + policy_generation.prepare_for_generation() + + # ── Per-step TQ partition register ───────────────────── + # Done before the rollout actor's kv_batch_put so the + # partition exists with the expected schema. + policy.prepare_step( + num_samples=int(repeated_batch.size), + group_size=master_config["grpo"]["num_generations_per_prompt"], + ) + + # ── Rollout 1-hop put: actor runs rollout + flatten + + # mask construction + prompt extraction + baseline/std, + # writes bulk to TQ in one flat kv_batch_put, returns + # only meta + small slice. Bulk never visits the driver. + dynamic_sampling_num_gen_batches += 1 + with timer.time("generation"): + n_prompts = int(repeated_batch.size) + uids = [str(uuid.uuid4()) for _ in range(n_prompts)] + + # Single Ray RPC: rollout + flatten + mask + prompt + # extraction + baseline/std + kv_batch_put + finish + # generation + logger metrics — all bundled into one + # round-trip. + # ``first_iter`` is the actor's signal to call + # ``policy_generation.snapshot_step_metrics()``. + # ``dynamic_sampling_num_gen_batches`` is incremented + # to 1 just above before this branch — keep these in + # sync if either is renamed. + ( + meta, + slice_extras, + rollout_metrics, + generation_logger_metrics, + ) = ray.get( + rollout_actor.rollout_to_tq.remote( + repeated_batch, + uids=uids, + partition_id=policy.tq_partition_id, + first_iter=(dynamic_sampling_num_gen_batches == 1), + ) + ) + slice_data: _DSlice = BatchedDataDict[Any](slice_extras) + del slice_extras + + if not _should_log_nemo_gym_responses(master_config): + for key in list(rollout_metrics): + if "full_result" in key: + rollout_metrics.pop(key) + + metrics_logging_data["mean_gen_tokens_per_sample"] = ( + rollout_metrics["mean_gen_tokens_per_sample"] + ) + logger.log_metrics(rollout_metrics, total_steps + 1, prefix="train") + + # ── Per-sample driver compute on slice ──────────────── + # scale_rewards / apply_reward_shaping / overlong filter + # / baseline-std all operate on small per-sample + # tensors. Mirrors grpo_sync.py legacy layout — they + # used to be on the driver, were briefly on the actor, + # now back on the driver where they belong (no bulk + # touched by any of these ops). + with timer.time("reward_calculation"): + slice_data = scale_rewards( + slice_data, + master_config["grpo"]["reward_scaling"], + ) + if master_config["grpo"]["reward_shaping"]["enabled"]: + slice_data = apply_reward_shaping( + slice_data, + master_config["grpo"]["reward_shaping"], + ) + if master_config["grpo"]["overlong_filtering"]: + lm = slice_data["loss_multiplier"].clone() + lm[slice_data["truncated"]] = 0 + slice_data["loss_multiplier"] = lm + slice_data["baseline"], slice_data["std"] = ( + calculate_baseline_and_std_per_prompt( + slice_data["prompt_ids_for_adv"], + slice_data["total_reward"], + torch.ones_like(slice_data["total_reward"]), + leave_one_out_baseline=master_config["grpo"][ + "use_leave_one_out_baseline" + ], + ) + ) + + # ── Dynamic sampling (DAPO non-zero-std filter) ──────── + # Slice-only; bulk in TQ untouched except for kv_clear + # of dropped / overflow-discarded uids. + ds_metrics: dict = {} + unfiltered_rewards_for_logging: Optional[torch.Tensor] = None + if master_config["grpo"]["use_dynamic_sampling"]: + with timer.time("dynamic_sampling"): + train_prompts_size = ( + master_config["grpo"]["num_prompts_per_step"] + * master_config["grpo"]["num_generations_per_prompt"] + ) + ( + pending_meta, + pending_slice, + pending_unfiltered_rewards, + is_complete, + ds_metrics, + unfiltered_rewards_for_logging, + ) = _apply_dynamic_sampling( + meta=meta, + slice_data=slice_data, + pending_meta=pending_meta, + pending_slice=pending_slice, + pending_unfiltered_rewards=pending_unfiltered_rewards, + train_prompts_size=train_prompts_size, + num_gen_batches=dynamic_sampling_num_gen_batches, + max_gen_batches=master_config["grpo"][ + "dynamic_sampling_max_gen_batches" + ], + dp_client=policy.dp_client, + ) + if not is_complete: + current_size = ( + len(pending_meta.keys) + if pending_meta is not None + else 0 + ) + print( + f"Dynamic sampling: {current_size}/{train_prompts_size} " + f"non-zero-std prompts after batch " + f"{dynamic_sampling_num_gen_batches}; sampling more.", + flush=True, + ) + continue + + # Adopt the now-complete cache as this step's batch. + meta = pending_meta + slice_data = pending_slice + pending_meta = None + pending_slice = None + + # ── Unpack slice (small per-sample tensors) ──────────── + rewards = ( + slice_data["filtered_reward"] + if master_config["grpo"]["use_dynamic_sampling"] + else slice_data["total_reward"] + ) + baseline = slice_data["baseline"] + std = slice_data["std"] + input_lengths = slice_data["input_lengths"] + prompt_ids_for_adv = slice_data["prompt_ids_for_adv"] + loss_multiplier = slice_data["loss_multiplier"] + truncated = slice_data["truncated"] + length = slice_data["length"] + + gen_step_metrics = {} + if hasattr(policy_generation, "get_step_metrics"): + gen_step_metrics = policy_generation.get_step_metrics() + baseline_for_log = baseline.clone() + + memory_tracker.snapshot_start_of_stage("Computing logprobs", dir()) + print("ā–¶ Preparing for logprob inference...", flush=True) + with timer.time("logprob_inference_prep"): + policy.prepare_for_lp_inference() + + print("ā–¶ Computing logprobs...", flush=True) + with timer.time("policy_and_reference_logprobs"): + # Meta-driven worker dispatch. Workers fetch their + # slice from TQ; logprob result is also written back + # to TQ as ``prev_logprobs`` / + # ``reference_policy_logprobs`` columns under + # ``meta.keys`` AND returned to the driver via Ray + # for the next compute. + _prev_lp = policy.get_logprobs_from_meta(meta, timer=timer) + prev_logprobs = _prev_lp["logprobs"] + + if not master_config["grpo"].get( + "skip_reference_policy_logprobs_calculation" + ): + _ref_lp = policy.get_reference_policy_logprobs_from_meta( + meta, + timer=timer, + ) + reference_policy_logprobs = _ref_lp["reference_logprobs"] + else: + reference_policy_logprobs = None + + # Driver pulls only the per-token columns it needs + # for masking / advantage. Bulk (input_ids, multimodal, + # output_ids, attention_mask, position_ids) stays in + # TQ — workers will fetch it via ``train_presharded``. + extras_bdd = read_columns( + policy.dp_client, + meta, + select_fields=["generation_logprobs", "token_mask"], + pad_value_dict=_pad_dict, + ) + generation_logprobs = extras_bdd["generation_logprobs"] + token_mask = extras_bdd["token_mask"] + + # Thin BDD for the data-driven masking call: take + # the slice you need, transform, write delta back. + masking_data = BatchedDataDict[ClippedPGLossDataDict]( + { + "token_mask": token_mask, + "sample_mask": loss_multiplier, + "prev_logprobs": prev_logprobs, + "generation_logprobs": generation_logprobs, + } + ) + + ( + max_seq_mult_prob_error, + num_masked_seqs, + masked_correct_pct, + ) = compute_and_apply_seq_logprob_error_masking( + train_data=masking_data, + rewards=rewards, + seq_logprob_error_threshold=master_config["grpo"][ + "seq_logprob_error_threshold" + ], + ) + # masking may have mutated sample_mask in place — + # capture the post-masking value for delta-write. + sample_mask = masking_data["sample_mask"] + + with timer.time("advantage_calculation"): + print("ā–¶ Computing advantages...", flush=True) + mask = token_mask * sample_mask.unsqueeze(-1) + + # Thin slice-shaped repeated_batch for compute_advantage. + # GRPO and Reinforce++ estimators ignore repeated_batch + # (swallowed via **kwargs); GDPO reads the per-component + # reward keys discovered by get_gdpo_reward_component_keys. + # The actor plumbs those keys into ``slice_data`` so the + # thin BDD here is byte-equivalent to legacy passing the + # full repeated_batch. + rb_for_adv = BatchedDataDict[Any]( + { + "total_reward": rewards, + "baseline": baseline, + "std": std, + } + ) + for k in get_gdpo_reward_component_keys(slice_data): + rb_for_adv[k] = slice_data[k] + advantages = adv_estimator.compute_advantage( + prompt_ids=prompt_ids_for_adv, + rewards=rewards, + mask=mask, + repeated_batch=rb_for_adv, + logprobs_policy=prev_logprobs, + logprobs_reference=reference_policy_logprobs, + ) + del prompt_ids_for_adv + + _log_mixed_rewards_and_advantages_information( + logger=logger, + total_steps=total_steps, + metrics=metrics, + baseline=baseline_for_log, + advantages=advantages, + ) + del baseline_for_log + + # ── Driver delta-write: advantages + (post-masking) + # sample_mask under the same meta.keys so workers fetch + # the union via train_presharded. + write_columns( + policy.dp_client, + meta, + fields={ + "advantages": advantages, + "sample_mask": sample_mask, + }, + ) + + memory_tracker.snapshot_start_of_stage("Policy train", dir()) + print("ā–¶ Preparing for training...", flush=True) + with timer.time("training_prep"): + policy.prepare_for_training() + POLICY_GENERATION_STALE = True + + print("ā–¶ Training policy...", flush=True) + with timer.time("policy_training"): + # Meta-driven train: workers fetch the union of + # rollout + driver-written + worker-written columns + # from TQ, train, return aggregated metrics via Ray. + train_results = policy.train_from_meta( + meta, + loss_fn=loss_fn, + timer=timer, + ) + + if sync_kv_scales: + with timer.time("recompute_kv_scales"): + print( + "ā–¶ Recomputing KV cache scales after policy update...", + flush=True, + ) + # Exclude logprobs, masks, and advantages; multimodal extras pass through. + _calib_fields = [ + f + for f in (meta.fields or []) + if f not in DP_CALIB_EXCLUDED_FIELDS + ] + calibration_data = read_columns( + policy.dp_client, + meta, + select_fields=_calib_fields, + pad_value_dict=_pad_dict, + ) + kv_scales_cache = policy.calibrate_qkv_fp8_scales( + calibration_data, + include_q=True, + )["layers"] + POLICY_GENERATION_STALE = True + + # Stash input_ids and content before kv_clear so the + # late log_data jsonl block can use them. The clear below + # removes meta.keys from TQ, so any post-clear + # read_columns on this meta would fail. ``content`` is a + # decoded object array (list[str]); read_columns decodes + # the NonTensorStack wire field via materialize. + _log_input_ids: Optional[torch.Tensor] = None + _log_content: Optional[np.ndarray] = None + if not _should_log_nemo_gym_responses(master_config): + _log_select = ["input_ids"] + if "content" in (meta.fields or []): + _log_select.append("content") + _log_extras = read_columns( + policy.dp_client, + meta, + select_fields=_log_select, + pad_value_dict=_pad_dict, + ) + _log_input_ids = _log_extras["input_ids"] + _log_content = _log_extras.get("content") + + # ── Step-end TQ cleanup ──────────────────────────────── + policy.dp_client.kv_clear( + keys=meta.keys, + partition_id=meta.partition_id, + ) + + is_last_step = total_steps + 1 >= max_num_steps + if not master_config["data"]["use_multiple_dataloader"]: + is_last_step = is_last_step or ( + (current_epoch + 1 == max_num_epochs) + and (current_step + 1 == len(wrapped_dataloader)) + ) + + if (val_period > 0 and (total_steps + 1) % val_period == 0) or ( + val_at_end and is_last_step + ): + memory_tracker.snapshot_start_of_stage("Validation", dir()) + if NEED_REFIT and POLICY_GENERATION_STALE: + refit_policy_generation( + policy, + policy_generation, + colocated_inference, + kv_scales=kv_scales_cache if sync_kv_scales else None, + ) + POLICY_GENERATION_STALE = False + else: + if colocated_inference: + policy.offload_after_refit() + policy_generation.prepare_for_generation() + val_metrics, validation_timings = validate( + policy_generation, + val_dataloader, + tokenizer, + val_task_to_env, + step=total_steps + 1, + master_config=master_config, + logger=logger, + ) + policy_generation.finish_generation() + logger.log_metrics( + validation_timings, total_steps + 1, prefix="timing/validation" + ) + logger.log_metrics( + val_metrics, total_steps + 1, prefix="validation" + ) + + # advantages and token_mask are in scope from the + # advantage / masking blocks above. No need to re-fetch. + response_advantages = torch.masked_select(advantages, token_mask.bool()) + + memory_tracker.snapshot_start_of_stage("Metrics", dir()) + metrics = { + **metrics, + "loss": train_results["loss"].numpy(), + "grad_norm": train_results["grad_norm"].numpy(), + "reward": rewards.numpy(), + "mean_prompt_length": length.numpy(), + "total_num_tokens": input_lengths.numpy(), + "advantages/mean": torch.mean(response_advantages).detach().item() + if response_advantages.numel() > 0 + else 0.0, + "advantages/max": torch.max(response_advantages).detach().item() + if response_advantages.numel() > 0 + else 0.0, + "advantages/min": torch.min(response_advantages).detach().item() + if response_advantages.numel() > 0 + else 0.0, + **ds_metrics, + } + if "moe_metrics" in train_results: + metrics.update( + {f"moe/{k}": v for k, v in train_results["moe_metrics"].items()} + ) + # Cumulative unfiltered total_reward across all DS iterations + # (sliced to train_prompts_size). Falls back to filtered + # rewards if apply_dynamic_sampling didn't provide it + # (mid-step path). Hoisted once for reuse in metrics, jsonl, + # and the per-step print below. + unfiltered_rewards = ( + unfiltered_rewards_for_logging + if unfiltered_rewards_for_logging is not None + else rewards + ) + if master_config["grpo"]["use_dynamic_sampling"]: + metrics["filtered_reward"] = rewards.numpy() + metrics["reward"] = unfiltered_rewards.numpy() + + metrics.update(train_results["all_mb_metrics"]) + metrics.update(gen_step_metrics) + for k, v in metrics.items(): + if k in {"probs_ratio_min", "probs_ratio_clamped_min"}: + valid_values = [x for x in v if not np.isinf(x)] + metrics[k] = ( + np.min(valid_values).item() if valid_values else -1.0 + ) + elif k in {"probs_ratio_max", "probs_ratio_clamped_max"}: + valid_values = [x for x in v if not np.isinf(x)] + metrics[k] = ( + np.max(valid_values).item() if valid_values else -1.0 + ) + elif k in { + "lr", + "wd", + "reward", + "filtered_reward", + "global_valid_seqs", + "global_valid_toks", + "mean_prompt_length", + }: + metrics[k] = np.mean(v).item() + elif isinstance(v, (np.ndarray, list)): + metrics[k] = np.sum(v).item() + else: + print(f"Skipping aggregation for {k} ({type(v)})") + + metrics.update(rollout_metrics) + metrics["generation_logger_metrics"] = generation_logger_metrics + total_valid_tokens += metrics["global_valid_toks"] + + metrics["max_seq_mult_prob_error"] = max_seq_mult_prob_error + metrics["num_masked_seqs_by_logprob_error"] = num_masked_seqs + metrics["masked_correct_pct"] = masked_correct_pct + + consumed_samples += master_config["grpo"]["num_prompts_per_step"] + timeout.mark_iteration() + + should_save_by_step = ( + is_last_step + or (total_steps + 1) % master_config["checkpointing"]["save_period"] + == 0 + ) + should_save_by_timeout = timeout.check_save() + + memory_tracker.snapshot_start_of_stage("Checkpointing", dir()) + if master_config["checkpointing"]["enabled"] and ( + should_save_by_step or should_save_by_timeout + ): + policy.prepare_for_training() + + grpo_save_state["current_step"] = current_step + 1 + grpo_save_state["total_steps"] = total_steps + 1 + grpo_save_state["current_epoch"] = current_epoch + grpo_save_state["total_valid_tokens"] = total_valid_tokens + if val_metrics is not None: + grpo_save_state["val_reward"] = val_metrics["accuracy"] + elif "val_reward" in grpo_save_state: + del grpo_save_state["val_reward"] + grpo_save_state["consumed_samples"] = consumed_samples + + full_metric_name = master_config["checkpointing"]["metric_name"] + if full_metric_name is not None: + assert full_metric_name.startswith( + "train:" + ) or full_metric_name.startswith("val:"), ( + f"metric_name={full_metric_name} must start with 'val:' or 'train:'" + ) + prefix, metric_name = full_metric_name.split(":", 1) + metrics_source = metrics if prefix == "train" else val_metrics + if not metrics_source: + warnings.warn( + f"You asked to save checkpoints based on {metric_name} but no {prefix} metrics were collected. ", + stacklevel=2, + ) + if full_metric_name in grpo_save_state: + del grpo_save_state[full_metric_name] + elif metric_name not in metrics_source: + raise ValueError( + f"Metric {metric_name} not found in {prefix} metrics" + ) + else: + grpo_save_state[full_metric_name] = metrics_source[ + metric_name + ] + + with timer.time("checkpointing"): + print( + f"Saving checkpoint for step {total_steps + 1}...", + flush=True, + ) + checkpoint_path = checkpointer.init_tmp_checkpoint( + total_steps + 1, grpo_save_state, master_config + ) + policy.save_checkpoint( + weights_path=os.path.join( + checkpoint_path, "policy", "weights" + ), + optimizer_path=os.path.join( + checkpoint_path, "policy", "optimizer" + ) + if checkpointer.save_optimizer + else None, + tokenizer_path=os.path.join( + checkpoint_path, "policy", "tokenizer" + ), + checkpointing_cfg=master_config["checkpointing"], + ) + if master_config["data"]["use_multiple_dataloader"]: + for ( + task_name, + task_dataloader, + ) in wrapped_dataloader.dataloaders.items(): + torch.save( + task_dataloader.state_dict(), + os.path.join( + checkpoint_path, + f"train_dataloader_{task_name}.pt", + ), + ) + else: + torch.save( + wrapped_dataloader.state_dict(), + os.path.join(checkpoint_path, "train_dataloader.pt"), + ) + checkpointer.finalize_checkpoint(checkpoint_path) + + memory_tracker.snapshot_start_of_stage("Logging", dir()) + # Per-step log_data jsonl. The 1-hop driver holds per-token + # slices it computed against (advantages, sample_mask, + # prev_logprobs, generation_logprobs, token_mask). For + # ``token_ids`` we fetch the small ``input_ids`` column from + # TQ at log time — same data-driven slice pattern as masking + # / KV calibration. + if not _should_log_nemo_gym_responses(master_config): + log_data: dict = {} + if "agent_ref" in repeated_batch: + log_data["agent_ref"] = repeated_batch["agent_ref"] + if master_config["grpo"]["use_dynamic_sampling"]: + # Legacy semantics: ``rewards`` is unfiltered total_reward, + # ``filtered_rewards`` is the kept slice that's trained on. + log_data["rewards"] = unfiltered_rewards.tolist() + log_data["filtered_rewards"] = rewards.tolist() + else: + log_data["rewards"] = rewards.tolist() + log_data["input_lengths"] = input_lengths.tolist() + log_data["token_loss_mask"] = token_mask.tolist() + log_data["sample_loss_mask"] = sample_mask.tolist() + log_data["advantages"] = advantages.tolist() + log_data["generation_logprobs"] = generation_logprobs.tolist() + log_data["prev_logprobs"] = prev_logprobs.tolist() + # input_ids was stashed before the step-end kv_clear (the + # keys are no longer in TQ at this point); ``_log_input_ids`` + # is None when nemo_gym-responses logging path skipped the + # outer ``if not _should_log_nemo_gym_responses`` branch. + if _log_input_ids is not None: + log_data["token_ids"] = _log_input_ids.tolist() + # ``content`` (raw assistant text) is fetched from TQ as + # an object-array column above (stashed before kv_clear). + if _log_content is not None: + log_data["content"] = _log_content.tolist() + logger.log_batched_dict_as_jsonl( + log_data, f"train_data_step{total_steps + 1}.jsonl" + ) + del log_data + + timing_metrics: dict = timer.get_timing_metrics(reduction_op="sum") # type: ignore + if metrics["token_mult_prob_error"] > 1.05: + logger.log_plot_token_mult_prob_error( + { + "prompt_lengths": length, + "full_lengths": input_lengths, + "generation_logprobs": generation_logprobs, + "prev_logprobs": prev_logprobs, + "token_mask": token_mask, + "sample_mask": sample_mask, + }, + total_steps + 1, + name="train/token_mult_prob_error_plot_sample", + ) + if master_config["policy"]["generation"].get("vllm_cfg", {}).get( + "enable_vllm_metrics_logger", False + ) and master_config.get("logger", {}).get("wandb_enabled", False): + log_generation_metrics_to_wandb( + generation_logger_metrics, + total_steps + 1, + master_config["policy"]["generation"]["vllm_cfg"][ + "vllm_metrics_logger_interval" + ], + logger, + ) + + if ( + master_config["policy"]["generation"] + .get("vllm_cfg", {}) + .get("async_engine", False) + ): + for metric_name in metrics.keys(): + if metric_name.startswith("histogram/"): + logger.log_histogram( + metrics[metric_name], + total_steps + 1, + f"generation_metrics/{metric_name}", + ) + + print("\nšŸ“Š Training Results:") + print(f" • Loss: {metrics['loss']:.4f}") + if "draft_loss" in metrics: + print(f" • Draft Loss: {metrics['draft_loss']:.4f}") + print(f" • Generation KL Error: {metrics['gen_kl_error']:.4f}") + if master_config["grpo"]["use_dynamic_sampling"]: + print(f" • Avg Filtered Reward: {np.mean(rewards.numpy()):.4f}") + print( + f" • Avg Total Reward: {np.mean(unfiltered_rewards.numpy()):.4f}" + ) + else: + print(f" • Avg Reward: {np.mean(rewards.numpy()):.4f}") + print( + f" • Mean Generation Length: {metrics_logging_data['mean_gen_tokens_per_sample']:.4f}", + flush=True, + ) + + print("\nā±ļø Timing:", flush=True) + total_time = timing_metrics.get("total_step_time", 0) + + number_of_samples_per_step = ( + master_config["grpo"]["num_prompts_per_step"] + * master_config["grpo"]["num_generations_per_prompt"] + ) + total_num_gpus = ( + master_config["cluster"]["num_nodes"] + * master_config["cluster"]["gpus_per_node"] + ) + + print(f" • Total step time: {total_time:.2f}s", flush=True) + + for k, v in sorted( + timing_metrics.items(), key=lambda item: item[1], reverse=True + ): + if k != "total_step_time": + percent = (v / total_time * 100) if total_time > 0 else 0 + print(f" • {k}: {v:.2f}s ({percent:.1f}%)", flush=True) + + timing_metrics["valid_tokens_per_sec_per_gpu"] = ( + metrics["global_valid_toks"] / total_time / total_num_gpus + ) + performance_metrics = print_performance_metrics( + train_results, metrics, timing_metrics, master_config + ) + + logger.log_metrics(metrics, total_steps + 1, prefix="train") + logger.log_metrics( + performance_metrics, total_steps + 1, prefix="performance" + ) + logger.log_metrics( + timing_metrics, + total_steps + 1, + prefix="timing/train", + step_finished=True, + ) + + dynamic_sampling_num_gen_batches = 0 + + memory_tracker.snapshot_start_of_stage("After CPU memory clear", dir()) + + del repeated_batch + del rewards + del metrics + if "val_metrics" in dir(): + del val_metrics + + timer.reset() + current_step += 1 + total_steps += 1 + if should_save_by_timeout: + memory_tracker.snapshot_start_of_stage("", dir()) + print("Timeout has been reached, stopping training early", flush=True) + return + if total_steps >= max_num_steps: + memory_tracker.snapshot_start_of_stage("", dir()) + print( + "Max number of steps has been reached, stopping training early", + flush=True, + ) + return + + current_epoch += 1 + current_step = 0 diff --git a/nemo_rl/algorithms/reward_functions.py b/nemo_rl/algorithms/reward_functions.py index 87c826db26..4966ce4b12 100644 --- a/nemo_rl/algorithms/reward_functions.py +++ b/nemo_rl/algorithms/reward_functions.py @@ -130,22 +130,34 @@ def apply_reward_shaping( # Calculate the expected response length expected_response_length = max_response_length - overlong_buffer_length - assert len(batch["message_log"]) == len(rewards), ( + # Prefer slim per-sample tensor (data-plane path: message_log lives in + # TQ, slice carries response_token_lengths). Fall back to scanning + # message_log for the legacy non-data-plane caller. + response_token_lengths = batch.get("response_token_lengths") + if response_token_lengths is not None: + if isinstance(response_token_lengths, torch.Tensor): + response_lengths = response_token_lengths.tolist() + else: + response_lengths = list(response_token_lengths) + else: + response_lengths = [] + for message_log in batch["message_log"]: + length = None + for message in message_log: + if message["role"] == "assistant": + length = message["token_ids"].shape[0] + break + assert length is not None, ( + "Assistant response not found during reward shaping" + ) + response_lengths.append(length) + + assert len(response_lengths) == len(rewards), ( "The number of messages in the batch must match the number of rewards" ) updated_rewards = torch.zeros_like(rewards) - for i, message_log in enumerate(batch["message_log"]): - # Get the assistant response length (index 1 is the assistant response) - message_response_length = None - for message in message_log: - if message["role"] == "assistant": - message_response_length = message["token_ids"].shape[0] - break - assert message_response_length is not None, ( - "Assistant response not found during reward shaping" - ) - + for i, message_response_length in enumerate(response_lengths): # Calculate the exceed length and the corresponding reward penalty exceed_length = message_response_length - expected_response_length overlong_reward = min( diff --git a/nemo_rl/data/llm_message_utils.py b/nemo_rl/data/llm_message_utils.py index 32bac1e923..f19aade0f0 100644 --- a/nemo_rl/data/llm_message_utils.py +++ b/nemo_rl/data/llm_message_utils.py @@ -14,6 +14,7 @@ import warnings from typing import Any, Optional, Union, cast +import numpy as np import torch from datasets import Dataset from transformers.tokenization_utils_base import PreTrainedTokenizerBase @@ -687,3 +688,138 @@ def remap_dataset_keys( lambda x: {v: x[k] for k, v in mapping_dict.items()}, remove_columns=list(mapping_dict.keys()), ) + + +# ── Decomposed wire format for `message_log` ────────────────────────── +# +# `message_log` mixes torch.Tensor with Python objects at the per-row +# level (`{"role": str, "content": str, "token_ids": Tensor, ...}` per +# turn). Shipping that shape per-row through pickle serializes the +# *underlying storage* of view-aliased tensor slices — for a vllm batched +# output arena that's ~100 MB per row instead of the slice's ~10 KB. +# +# The helpers below split `message_log` into per-field arrays at the +# wire boundary (token tensors flat in `bulk_batch`, role/content +# strings as object arrays, per-turn lengths as one slim tensor) and +# rebuild the list-of-dicts shape on the consumer from local-arena +# views. No tensor ever reaches per-row pickle. + +# Fields ridden by `bulk_batch` and consumed by +# :func:`reconstruct_message_log` to rebuild the list-of-dicts view. +MESSAGE_LOG_BULK_FIELDS = ("turn_lengths", "turn_roles", "turn_contents") +# Slim per-sample field carried alongside the slice (not the bulk wire); +# consumed by :func:`apply_reward_shaping` on the driver. +MESSAGE_LOG_SLICE_FIELD = "response_token_lengths" + + +def decompose_message_log( + message_log_batch: list[LLMMessageLogType], +) -> dict[str, Any]: + """Split a list-of-lists-of-dicts ``message_log`` into per-field arrays. + + Returns a dict with: + + - ``turn_lengths`` — ``torch.LongTensor(B, max_turns)``, zero in unused slots. + - ``turn_roles`` — ``np.ndarray(object, (B,))`` of ``list[str]``. + - ``turn_contents`` — ``np.ndarray(object, (B,))`` of ``list[str]``. + - ``response_token_lengths`` — ``torch.LongTensor(B,)``, assistant-turn + length per sample (0 if no assistant turn). Consumed by + :func:`nemo_rl.algorithms.reward_functions.apply_reward_shaping`. + """ + batch_size = len(message_log_batch) + max_turns = max((len(ml) for ml in message_log_batch), default=0) + + turn_roles = np.empty(batch_size, dtype=object) + turn_contents = np.empty(batch_size, dtype=object) + # Build Python lists in the hot loop; one tensor allocation at the end + # avoids per-turn 0-d tensor writes inside the loop. + turn_lengths_lol: list[list[int]] = [[0] * max_turns for _ in range(batch_size)] + response_lengths: list[int] = [0] * batch_size + + for i, ml in enumerate(message_log_batch): + roles: list[str] = [] + contents: list[str] = [] + lengths_i = turn_lengths_lol[i] + for t, m in enumerate(ml): + role = m["role"] # required; surface bad data loudly here + roles.append(role) + contents.append(m.get("content", "")) + tok = m.get("token_ids") + if tok is None: + continue + length = int(tok.shape[0]) if isinstance(tok, torch.Tensor) else len(tok) + lengths_i[t] = length + if role == "assistant" and response_lengths[i] == 0: + response_lengths[i] = length + turn_roles[i] = roles + turn_contents[i] = contents + + return { + "turn_lengths": torch.tensor(turn_lengths_lol, dtype=torch.long), + "turn_roles": turn_roles, + "turn_contents": turn_contents, + "response_token_lengths": torch.tensor(response_lengths, dtype=torch.long), + } + + +def attach_message_log_view(batch: BatchedDataDict[Any]) -> None: + """Attach ``batch['message_log']`` in place if decomposed fields are present. + + Rebuilds ``message_log`` as views into the consumer-local ``input_ids`` + / ``generation_logprobs``. Aliasing is harmless because the local + tensors own their storage and consumers do not re-pickle ``message_log``. + No-op when the decomposed fields are absent (legacy pickle-shipped path). + """ + if "input_ids" not in batch or any(k not in batch for k in MESSAGE_LOG_BULK_FIELDS): + return + batch["message_log"] = reconstruct_message_log( + input_ids=batch["input_ids"], + turn_lengths=batch["turn_lengths"], + turn_roles=batch["turn_roles"], + turn_contents=batch["turn_contents"], + generation_logprobs=batch.get("generation_logprobs"), + ) + + +def reconstruct_message_log( + input_ids: Tensor, + turn_lengths: Tensor, + turn_roles: "np.ndarray", + turn_contents: "np.ndarray", + generation_logprobs: Optional[Tensor] = None, +) -> list[LLMMessageLogType]: + """Inverse of :func:`decompose_message_log`. + + Per-turn ``token_ids`` and ``generation_logprobs`` are **views** into + the consumer-local ``input_ids`` / ``generation_logprobs`` tensors. + The aliasing is harmless because the local tensors own their storage + (decoded from the wire) and consumers do not re-pickle ``message_log``. + """ + batch_size = int(input_ids.shape[0]) + # Single host-side materialization — avoids a per-turn .item() sync. + turn_lengths_list = turn_lengths.tolist() + out: list[LLMMessageLogType] = [] + for i in range(batch_size): + roles_i = turn_roles[i] + contents_i = turn_contents[i] + lengths_i = turn_lengths_list[i] + turns: LLMMessageLogType = [] + offset = 0 + for t, role in enumerate(roles_i): + length = lengths_i[t] + if length == 0: + turns.append({"role": role, "content": contents_i[t]}) + continue + turn: dict[str, Any] = { + "role": role, + "content": contents_i[t], + "token_ids": input_ids[i, offset : offset + length], + } + if generation_logprobs is not None and role == "assistant": + turn["generation_logprobs"] = generation_logprobs[ + i, offset : offset + length + ] + offset += length + turns.append(turn) + out.append(turns) + return out diff --git a/nemo_rl/data_plane/README.md b/nemo_rl/data_plane/README.md new file mode 100644 index 0000000000..4bee3bfd86 --- /dev/null +++ b/nemo_rl/data_plane/README.md @@ -0,0 +1,348 @@ +# nemo_rl.data_plane + +Stable boundary between NeMo-RL and any data-plane implementation +(currently `transfer_queue`; future: `nv-dataplane`). All call sites in +`nemo_rl/algorithms`, `nemo_rl/experience` and `nemo_rl/models` go +through `DataPlaneClient` — never `import transfer_queue` directly. +That's the swappable boundary. + +This README is the canonical reference: quickstart for users, runtime +view for anyone touching `nemo_rl/algorithms/grpo_sync.py`, +`nemo_rl/experience/sync_rollout_actor.py`, or `nemo_rl/data_plane/`. + +## Install + +`tensordict` and `TransferQueue==0.1.6` are base dependencies of +nemo-rl — `uv sync` (or `pip install -e .`) is enough; there is no +`[data-plane]` extra to remember. Worker venvs (built per-backend by +`nemo_rl.utils.venvs.create_local_venv` via bare `uv sync`) pick them up +automatically too, so the TQ adapter works on every worker class +(FSDP2, DTensor, mcore, automodel) without per-extra plumbing. + +## Quickstart + +```python +from tensordict import TensorDict +import torch + +from nemo_rl.data_plane import build_data_plane_client + +client = build_data_plane_client({ + "enabled": True, + "impl": "transfer_queue", + "backend": "simple", # or "mooncake_cpu" + "storage_capacity": 1_000_000, + "num_storage_units": 2, +}) + +client.register_partition( + partition_id="train", + fields=["input_ids", "advantages"], + num_samples=1024, + consumer_tasks=["prev_lp", "ref_lp", "train"], +) + +# Producer (rollout, ref policy, …) — sync put. Use ``async_kv_batch_put`` +# only when composing with an existing event loop (e.g. async rollout +# actor). +client.kv_batch_put( + keys=["uid-0", "uid-1"], + partition_id="train", + fields=TensorDict({"input_ids": torch.zeros(2, 128, dtype=torch.long)}, + batch_size=[2]), +) + +# Consumer — task-mediated discovery + claim (advances per-task cursor). +meta = client.claim_meta( + partition_id="train", + task_name="train", + required_fields=["input_ids", "advantages"], + batch_size=64, +) +batch = client.get_data(meta) # TensorDict +``` + +## When `enabled=False` + +The factory raises — there is intentionally no NoOp prod fallback. +Use the legacy `nemo_rl.algorithms.grpo.grpo_train` trainer for that +case (it never engages the data plane). The TQ-mediated trainer lives +at `nemo_rl.algorithms.grpo_sync.grpo_train_sync` and assumes +`enabled=True`. + +`NoOpDataPlaneClient` exists in `adapters/noop.py` purely as a test +fixture for the ABC contract tests — production callers must not import +it. + +## Hard rules + +These are checked at the adapter; violating them is a `TypeError`, not +a warning. + +* **No Python leaves on the bus.** `kv_batch_put(fields=...)` must be a + `TensorDict` of tensors. Use `tags=` for primitives, the Ray object + store for arbitrary Python objects. +* **`select_fields` is required on read.** `get_data` raises if neither + `select_fields` nor `meta.fields` is set — silently fetching the full + sample record is not allowed. + +--- + +## The API surface + +Everything goes through `DataPlaneClient` +(`nemo_rl/data_plane/interfaces.py`). Eight methods, three groups. + +### Lifecycle + +- `register_partition(partition_id, fields, num_samples, consumer_tasks, ...)` + declares the partition schema and which consumer tasks read from it. +- `close()` releases controller / storage handles. + +### Task-mediated (consumer-counter aware) + +- `claim_meta(partition_id, task_name, required_fields, batch_size) → KVBatchMeta` + discovers and claims samples ready for `task_name`; advances TQ's + per-task consumption cursor as a side effect. +- `get_data(meta, select_fields) → TensorDict` resolves a meta to data. +- `check_consumption_status(...) → bool`. + +### Direct-by-key (the hot path in sync 1-hop) + +- `kv_batch_put(keys, partition_id, fields)` — producer entrypoint; + flips `production_status[sample, field] = 1` as a side effect. +- `kv_batch_get(keys, partition_id, select_fields) → TensorDict` — direct fetch. +- `kv_clear(keys, partition_id)` — drop. + +### Helpers built on top (`nemo_rl/data_plane/`) + +- `kv_first_write(batch, uids, ...) → KVBatchMeta` — single flat + `kv_batch_put` of all rollout fields. +- `read_columns(client, meta, select)` — `kv_batch_get → materialize`. +- `write_columns(client, meta, fields)` — typed `kv_batch_put` for deltas. +- `shard_meta_for_dp(meta, dp_world)` — pure metadata split, no I/O, + no key remint. +- `meta.subset(idxs)` / `meta.slice(start, stop)` / `meta.concat(other)` — + pure metadata transforms (methods on `KVBatchMeta`; used by + dynamic_sampling). + +## Per-sample key invariant + +Mint **once** at rollout, reuse forever: + +``` + uid = "step17_prompt_42" # opaque, from driver dataset iter + key_i = f"{uid}_g{i}" # one per generation, i ∈ [0, n_gen) +``` + +Every `kv_batch_put` / `kv_batch_get` for that sample uses the same key. +Worker write-backs append columns; nothing remints. + +## E2E lifecycle for one GRPO step + +``` +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ DRIVER (grpo_sync.py) ─────────────────────────────┐ +│ │ +│ ā‘  register_partition(pid="step17", fields=[input_ids, ..., advantages, ...], │ +│ num_samples=N*G, consumer_tasks=["lp","ref","train"]) │ +│ │ +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”¬ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ + │ spawns + ā–¼ +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ SyncRolloutActor (Ray @remote) ───────────────────────────────────┐ +│ vllm.generate → flatten → mask → prompt extract │ +│ ā‘” kv_batch_put( keys=[uid_g0..uid_gN-1], │ +│ fields=TensorDict({input_ids, gen_logprobs, token_mask, ...})) │ +│ returns meta → driver │ +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”¬ā”€ā”˜ + │ + ā”Œā”€ DRIVER ─────────────────────────────────────────────────┐ │ + │ ā‘¢ shard_meta_for_dp(meta, dp_world=8) → [mā‚€..m₇] ā”‚ā—„ā”€ā”€ā”€ā”˜ + │ (pure metadata, no I/O, no key remint) │ + ā””ā”€ā”€ā”€ā”€ā”¬ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ + │ Ray-call per DP rank with mįµ¢ + ā–¼ +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ MegatronPolicyWorker[rank=i] (Ɨ8) ─────────────────────────────────┐ +│ ā‘£ kv_batch_get(keys=mįµ¢.keys, select=[input_ids, token_mask, ...]) │ +│ forward → prev_logprobs │ +│ ⑤ leader-only: kv_batch_put(keys=mįµ¢.keys, fields={prev_logprobs:T}) ── PHASE 1│ +│ │ +│ ā‘„ kv_batch_get(...) → ref_logprobs │ +│ ⑦ leader-only: kv_batch_put({reference_policy_logprobs:T}) ── PHASE 2│ +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”¬ā”€ā”˜ + │ + ā”Œā”€ DRIVER (small slice work, never bulk) ──────────────────┐ │ + │ ā‘§ read_columns(meta, select=[token_logprobs, rewards]) ā”‚ā—„ā”€ā”€ā”€ā”˜ + │ compute advantages (vectorized, on driver, tiny) │ + │ ⑨ write_columns(meta, {advantages: T}) │ + │ │ + │ [optional] dynamic_sampling: meta.subset(...) │ + │ [optional] kv_clear(dropped_keys) │ + ā””ā”€ā”€ā”€ā”€ā”¬ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ + │ shard_meta_for_dp again, Ray-call per rank + ā–¼ +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ MegatronPolicyWorker[rank=i] (Ɨ8) ─────────────────────────────────┐ +│ ā‘© kv_batch_get(select=[input_ids, prev_logprobs, ref_lp, advantages, masks]) │ +│ loss → grad → optimizer.step() │ +│ (no write-back: training is terminal for this partition) │ +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”¬ā”€ā”˜ + │ + ā”Œā”€ DRIVER (step-end housekeeping) ─────────────────────────┐ │ + │ ⑪ kv_batch_get(select=[input_ids]) ← stash for log_data ā”‚ā—„ā”€ā”€ā”€ā”˜ + │ ā‘« kv_clear(keys=meta.keys, partition_id=pid) │ + ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ + + (next step → ā‘  again with a fresh partition_id) +``` + +Mental model: **TQ is the bus, not a database.** It holds bulk between +stages of one step, then `kv_clear` drops it. Driver only handles small +per-sample slices; workers handle bulk via TQ. + +## Call counts per step + +Steady state on the validation run (32 samples, 8 GPUs, no PP/TP): + +| TQ call | Site | Count / step | Payload | +|----------------------------|---------------------|-------------:|-----------------------------------| +| `register_partition` | driver | 1 | metadata only | +| `kv_batch_put` (rollout) | SyncRolloutActor | 1 | full bulk (~600 KB; GBs at scale) | +| `shard_meta_for_dp` | driver | 3 | no I/O | +| `kv_batch_get` (lp inputs) | workers | 8 (per DP) | input slice | +| `kv_batch_put` (lp out) | workers (leader) | 1 | prev_logprobs delta | +| `kv_batch_get` (ref input) | workers | 8 | input slice | +| `kv_batch_put` (ref out) | workers (leader) | 1 | ref_logprobs delta | +| `kv_batch_get` (adv slice) | driver | 1 | small (rewards + token_lp) | +| `kv_batch_put` (advantages)| driver | 1 | small delta | +| `kv_batch_get` (train) | workers | 8 | full slice | +| `kv_batch_get` (log_data) | driver | 1 | input_ids only | +| `kv_clear` | driver | 1 | drop | + +Total: ~32 TQ RPCs / step (excluding `shard_meta_for_dp`, which is +no-I/O). 24 of those are the per-DP fetch fan-out (3 phases Ɨ 8 ranks). + +## Concrete examples + +**Rollout produces (only first-write):** +```python +meta = kv_first_write( + final_batch_cpu=batch, + uids=[f"step{step}_p{i}" for i in range(num_prompts)], + dp_client=policy.dp_client, + partition_id=f"grpo_step_{step}", +) +# meta.keys = ["step17_p0_g0", "step17_p0_g1", ..., "step17_p7_g3"] +# meta.fields = ["input_ids", "input_lengths", "generation_logprobs", +# "token_mask", "sample_mask", ...] +``` + +**Driver appends a column (small delta, no bulk):** +```python +slice_ = read_columns(client, meta, select_fields=["token_logprobs", "rewards"]) +advantages = compute_advantages(slice_) # tiny driver compute +write_columns(client, meta, {"advantages": advantages}) +``` + +**Worker fan-out (driver):** +```python +shards, _ = shard_meta_for_dp(meta, dp_world=8) +ray.get([ + worker[i].train_from_meta.remote(shards[i]) + for i in range(8) +]) +``` + +**Worker fetch + leader write-back (in `worker_mixin._write_back`):** +```python +inputs = read_columns(self._dp_client, meta, select_fields=LP_SEED_FIELDS) +prev_lp = self.forward(inputs) +if self._is_replica_leader(): + write_columns(self._dp_client, meta, {"prev_logprobs": prev_lp}) +``` + +**Step-end teardown:** +```python +log_input_ids = read_columns(client, meta, select_fields=["input_ids"]) +client.kv_clear(keys=meta.keys, partition_id=meta.partition_id) +``` + +## Performance characterization + +End-to-end parity vs the legacy driver-bulk path on the toy validation +run: + +- Steps 1–7 are bit-exact (loss + reward); divergence afterward is the + expected stochastic drift from accumulated policy updates. +- Steady-state step time: **+0.21 s** (1-hop 7.86 s vs legacy 7.65 s, + ~3 %). + +Per-phase breakdown (steady state, steps 2–19): + +| Phase | v4 (1-hop) | Legacy | Ī” | +|-------------------------------|-----------:|---------:|-----------:| +| Total step time | 7.606 s | 7.393 s | **+0.213 s** | +| policy_training | 0.596 s | 0.567 s | +0.028 s | +| generation | 1.502 s | 1.528 s | āˆ’0.027 s | +| policy_and_ref_logprob | 1.588 s | 1.448 s | **+0.141 s** | +| residual (driver bookkeeping) | 3.920 s | 3.850 s | +0.070 s | + +**The +0.21 s overhead is entirely TQ RPC roundtrip cost in the +logprob phase** (two worker calls Ɨ one fetch + one write each). +Generation and training are unchanged. + +### Crossover scale (where TQ wins) + +TQ overhead is mostly latency-bound (~constant per step), while legacy +driver fan-out is bandwidth-bound (scales with batch tensor volume Ɨ +DP fan-out). Mental model: + +- Legacy driver overhead ā‰ˆ ~5 ms/MB Ɨ (4 full-batch transfers per step) + Ɨ DP-fan-out +- TQ overhead ā‰ˆ ~200 ms fixed (after fuse-and-overlap optimization: + ~100 ms) + +| Scale | Batch / step | DP ranks | Legacy cost | Winner | +|------------------------------------------|-------------:|---------:|------------:|-------------------------| +| Toy (this run, 1B, 512 tok, BS 32) | 0.6 MB | 8 | ~50 ms | **legacy +0.21 s** | +| Small prod (8B, 1k tok, BS 256) | ~10 MB | 8 | ~300 ms | **roughly tied** | +| Mid prod (70B, 4k tok, BS 1024) | ~250 MB | 32 | ~5–10 s | **TQ wins decisively** | +| Long-context (8k–32k seq, GRPO 16 gens) | 1–5 GB | 64+ | tens of s | **TQ wins decisively** | + +Rough crossover: **~10 MB / step / DP-rank of effective batch volume**. +Long sequences, more generations per prompt, and more DP ranks all +push the needle hard toward TQ. + +### Cheapest optimizations (deferred) + +1. **Fuse `get_logprobs` + `get_reference_policy_logprobs` into one + worker call** — saves ~70 ms (one TQ input-fetch). Brings overhead + from +0.21 s → ~+0.14 s. +2. **Overlap TQ write-back with next-phase fetch** — saves another + ~30–50 ms. Combined: ~+0.10 s overhead, effectively at parity. + +Both are clean refactors inside `tq_policy.py` / +`worker_mixin.py` and don't touch `grpo_sync.py`. Not on the +critical path; flag for the next data-plane optimization round. + +## Where to look in the code + +| Concern | File | +|----------------------------------|----------------------------------------------------------------------| +| Stable boundary | `nemo_rl/data_plane/interfaces.py` | +| Adapter (TransferQueue impl) | `nemo_rl/data_plane/adapters/transfer_queue.py` | +| Column helpers above DP client | `nemo_rl/data_plane/column_io.py` (`read_columns`, `write_columns`) | +| First-write helper + rollout actor | `nemo_rl/experience/sync_rollout_actor.py` | +| DP-rank meta sharding | `nemo_rl/data_plane/preshard.py` | +| Worker fetch + write-back | `nemo_rl/data_plane/worker_mixin.py` | +| TQ-aware policy facade | `nemo_rl/models/policy/tq_policy.py` | +| End-to-end orchestration | `nemo_rl/algorithms/grpo_sync.py` | +| Unit tests | `tests/data_plane/unit/` | + +## Operational assumptions + +* One Ray cluster per experiment. The TQ controller is a globally + named Ray actor; running two trainers in the same cluster will + collide. +* Storage capacity sizing rule of thumb: + `storage_capacity ≄ 2 Ɨ num_prompts Ɨ n_gens Ɨ max_seq_len Ɨ + bytes_per_token Ɨ num_active_fields`. diff --git a/nemo_rl/data_plane/__init__.py b/nemo_rl/data_plane/__init__.py new file mode 100644 index 0000000000..56b19178a1 --- /dev/null +++ b/nemo_rl/data_plane/__init__.py @@ -0,0 +1,38 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""NeMo-RL data-plane package. + +The public surface is intentionally tiny: an ABC, a meta dataclass, a +config TypedDict, and a factory. Everything else is an implementation +detail of a specific adapter. +""" + +from nemo_rl.data_plane.codec import materialize +from nemo_rl.data_plane.factory import build_data_plane_client +from nemo_rl.data_plane.interfaces import ( + DataPlaneClient, + DataPlaneConfig, + KVBatchMeta, +) +from nemo_rl.data_plane.observability import MetricsDataPlaneClient, log_event + +__all__ = [ + "DataPlaneClient", + "DataPlaneConfig", + "KVBatchMeta", + "MetricsDataPlaneClient", + "build_data_plane_client", + "log_event", + "materialize", +] diff --git a/nemo_rl/data_plane/adapters/__init__.py b/nemo_rl/data_plane/adapters/__init__.py new file mode 100644 index 0000000000..341a77c5bc --- /dev/null +++ b/nemo_rl/data_plane/adapters/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo_rl/data_plane/adapters/noop.py b/nemo_rl/data_plane/adapters/noop.py new file mode 100644 index 0000000000..89e2a51010 --- /dev/null +++ b/nemo_rl/data_plane/adapters/noop.py @@ -0,0 +1,243 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""In-memory ``DataPlaneClient`` test fixture. + +Behaves like a real adapter end-to-end (put → get → clear, consumption +counters, field-presence as the stage-done signal) but stores everything +in process memory. The ABC contract tests run against this implementation +so they don't require TQ installed. + +Production callers must NOT use this — :func:`build_data_plane_client` +intentionally raises when ``enabled=False`` rather than returning a NoOp +fallback (see ``factory.py``). +""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Any + +import torch +from tensordict import TensorDict + +from nemo_rl.data_plane.codec import stack_or_nest as _stack_or_nest +from nemo_rl.data_plane.interfaces import DataPlaneClient, KVBatchMeta + + +def _reject_non_tensor_leaves(td: TensorDict) -> None: + """No pickle on the bus. Mirror of the TQ adapter check. + + Walk the leaves via ``keys()`` + indexed lookup rather than + ``items()``, because some tensordict versions skip ``NonTensorData`` + entries from ``items(leaves_only=True)`` — they're "leaves" by + structure but not tensor-typed, so they'd silently slip past a + naive items() iteration. + """ + bad = [] + for k in td.keys(include_nested=True, leaves_only=True): + v = td.get(k) + if not isinstance(v, torch.Tensor): + bad.append(k) + if bad: + raise TypeError( + f"kv_batch_put received non-tensor leaves: {bad}. " + "Tensorize via codec helpers, use `tags=` for primitives, " + "or use the Ray object store for arbitrary Python objects." + ) + + +@dataclass +class _Partition: + fields: list[str] + num_samples: int + consumer_tasks: list[str] + grpo_group_size: int | None + enums: dict[str, list[str]] + rows: dict[str, dict[str, torch.Tensor]] = field(default_factory=dict) + tags: dict[str, dict[str, Any]] = field(default_factory=dict) + # per-task set of keys already returned by claim_meta (TQ ``mode='fetch'``) + consumed: dict[str, set[str]] = field(default_factory=dict) + + +class NoOpDataPlaneClient(DataPlaneClient): + """Reference in-memory implementation.""" + + def __init__(self) -> None: + self._partitions: dict[str, _Partition] = {} + self._closed = False + + def register_partition( + self, + partition_id: str, + fields: list[str], + num_samples: int, + consumer_tasks: list[str], + grpo_group_size: int | None = None, + enums: dict[str, list[str]] | None = None, + ) -> None: + self._partitions[partition_id] = _Partition( + fields=list(fields), + num_samples=int(num_samples), + consumer_tasks=list(consumer_tasks), + grpo_group_size=grpo_group_size, + enums=dict(enums) if enums else {}, + consumed={t: set() for t in consumer_tasks}, + ) + + def claim_meta( + self, + partition_id: str, + task_name: str, + required_fields: list[str], + batch_size: int, + dp_rank: int | None = None, + blocking: bool = True, + timeout_s: float = 60.0, + ) -> KVBatchMeta: + del blocking, timeout_s, dp_rank # NoOp is single-process + rec = self._partitions[partition_id] + if task_name not in rec.consumed: + raise KeyError( + f"task {task_name!r} not registered as a consumer of " + f"partition {partition_id!r}" + ) + + ready: list[str] = [] + seqs: list[int] = [] + for key, row in rec.rows.items(): + if key in rec.consumed[task_name]: + continue + if not all(f in row for f in required_fields): + continue + ready.append(key) + tag = rec.tags.get(key, {}) + seqs.append(int(tag.get("input_lengths", 0))) + if len(ready) >= batch_size: + break + + rec.consumed[task_name].update(ready) + return KVBatchMeta( + partition_id=partition_id, + task_name=task_name, + keys=ready, + fields=list(required_fields), + sequence_lengths=seqs if any(seqs) else None, + ) + + def get_data( + self, + meta: KVBatchMeta, + select_fields: list[str] | None = None, + ) -> TensorDict: + fields = select_fields if select_fields is not None else meta.fields + if fields is None: + raise ValueError( + "get_data requires either select_fields or meta.fields; " + "fetching all fields silently is forbidden." + ) + return self.kv_batch_get(meta.keys, meta.partition_id, list(fields)) + + def check_consumption_status( + self, partition_id: str, task_names: list[str] + ) -> bool: + rec = self._partitions[partition_id] + for t in task_names: + if t not in rec.consumed: + return False + if len(rec.consumed[t]) < len(rec.rows): + return False + return True + + def kv_batch_put( + self, + keys: list[str], + partition_id: str, + fields: TensorDict | None = None, + tags: list[dict[str, Any]] | None = None, + ) -> KVBatchMeta: + rec = self._partitions[partition_id] + if fields is not None: + _reject_non_tensor_leaves(fields) + for i, key in enumerate(keys): + row = rec.rows.setdefault(key, {}) + for fname in fields.keys(): + val = fields[fname][i] + # Defense in depth — _reject_non_tensor_leaves can + # miss NonTensorData entries depending on the + # tensordict version's iteration semantics. + if not isinstance(val, torch.Tensor): + raise TypeError( + f"kv_batch_put received non-tensor leaf " + f"{fname!r}: {type(val).__name__}. " + "Tensorize via codec helpers, use `tags=` " + "for primitives, or use the Ray object store " + "for arbitrary Python objects." + ) + row[fname] = val.detach().clone() + if tags is not None: + for key, tag in zip(keys, tags): + rec.tags.setdefault(key, {}).update(tag) + return KVBatchMeta( + partition_id=partition_id, + task_name=None, + keys=list(keys), + fields=list(fields.keys()) if fields is not None else None, + ) + + def kv_batch_get( + self, + keys: list[str], + partition_id: str, + select_fields: list[str], + ) -> TensorDict: + rec = self._partitions[partition_id] + if not keys: + return TensorDict({}, batch_size=(0,)) + + out: dict[str, list[torch.Tensor]] = {f: [] for f in select_fields} + for key in keys: + row = rec.rows[key] + for f in select_fields: + if f not in row: + raise KeyError( + f"field {f!r} not yet produced for key {key!r} " + f"in partition {partition_id!r}" + ) + out[f].append(row[f]) + + stacked = {f: _stack_or_nest(out[f]) for f in select_fields} + return TensorDict(stacked, batch_size=(len(keys),)) + + def kv_clear(self, keys: list[str] | None, partition_id: str) -> None: + rec = self._partitions.get(partition_id) + if rec is None: + return + if keys is None: + rec.rows.clear() + rec.tags.clear() + for s in rec.consumed.values(): + s.clear() + self._partitions.pop(partition_id, None) + return + for key in keys: + rec.rows.pop(key, None) + rec.tags.pop(key, None) + for s in rec.consumed.values(): + s.discard(key) + + def close(self) -> None: + if self._closed: + return + self._partitions.clear() + self._closed = True diff --git a/nemo_rl/data_plane/adapters/transfer_queue.py b/nemo_rl/data_plane/adapters/transfer_queue.py new file mode 100644 index 0000000000..d20629a377 --- /dev/null +++ b/nemo_rl/data_plane/adapters/transfer_queue.py @@ -0,0 +1,627 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Adapter wiring :class:`DataPlaneClient` onto the ``transfer_queue`` package. + +Pure plumbing — it owns the TQ controller / client handle and translates +:class:`KVBatchMeta` ↔ TQ's own ``BatchMeta`` / ``KVBatchMeta``. No +business logic. Backend init is lifted from +``rl-arena/arena/backends.py``; the call shapes are lifted from +``rl-arena/arena/dataplane_client.py``. +""" + +from __future__ import annotations + +import ipaddress +import os +import socket +import subprocess +import time +from dataclasses import dataclass, field +from importlib import resources +from typing import Any + +import torch +from tensordict import TensorDict + +from nemo_rl.data_plane.interfaces import ( + DataPlaneClient, + DataPlaneConfig, + KVBatchMeta, +) + +# ────────────────────────────────────────────────────────────────────────── +# Lazy import of transfer_queue — keeps NeMo-RL importable without TQ +# installed; failure is deferred to construction time. +# ────────────────────────────────────────────────────────────────────────── + + +def _tq(): # pragma: no cover - trivially exercised by smoke tests + try: + import transfer_queue as tq + except ImportError as e: # noqa: F841 + raise ImportError( + "transfer_queue is not installed. It is a base dependency of " + "nemo-rl — try `uv sync` to refresh, or `pip install " + "TransferQueue==0.1.6` if you're not using uv." + ) from e + return tq + + +# ────────────────────────────────────────────────────────────────────────── +# Backend init — lifted from rl-arena/arena/backends.py. +# ────────────────────────────────────────────────────────────────────────── + + +def _get_local_node_ip() -> str: + """Return THIS process's host IP, not the cluster head's. + + Each Ray actor process must use its own node's IP so Mooncake's + announce address (``MC_TCP_BIND_ADDRESS`` → ``desc.ip_or_host_name`` + in ``transfer_engine_impl.cpp``) is routable cross-node. Link-local + (169.254/16, fe80::/10) is rejected — ``gethostbyname`` can resolve + to APIPA on hosts where ``avahi-autoipd`` is active. + """ + try: + ip = socket.gethostbyname(socket.gethostname()) + if ipaddress.ip_address(ip).is_link_local: + return "" + return ip + except Exception: + return "" + + +def _mooncake_transport_config() -> dict: + protocol = os.environ.get("MC_MOONCAKE_PROTOCOL", "tcp") + if protocol != "rdma": + return {"protocol": "tcp"} + device = os.environ.get("MC_MOONCAKE_DEVICE", "") + if not device: + try: + out = subprocess.run( + [ + "sh", + "-c", + "for d in /sys/class/infiniband/mlx5_*/ports/1/link_layer; do " + " test -f $d && grep -q Ethernet $d && basename $(dirname $(dirname $d)); " + "done | head -1", + ], + check=False, + capture_output=True, + text=True, + ).stdout.strip() + device = out or "" + except Exception: + device = "" + if device: + os.environ.setdefault("MC_GID_INDEX", os.environ.get("MC_GID_INDEX", "3")) + return {"protocol": "rdma", "device_name": device} + + +def _connect_existing() -> None: + """Worker-process path: connect this process's client to the Ray cluster. + + Connects to the already-running named controller actor. Mirrors + rl-arena/arena/dataplane_client.py's `tq.init()` (no args) call. + """ + _tq().init() + + +_TQ_RUNTIME_ENV_PATCHED = False + + +def _patch_tq_actor_runtime_env() -> None: + """Inject ``{"pip": ["TransferQueue==0.1.6"]}`` into TQ's actor ``.options()``. + + TQ spawns ``SimpleStorageUnit`` and ``TransferQueueController`` via + ``Cls.options(...).remote(...)`` without a runtime_env, so they + inherit the job-level env. In a multi-node container deployment + where each node has its own ``/opt/nemo_rl_venv``, the driver's + ``uv sync`` only updates ray-head's venv and a worker-node actor + fails with ``ModuleNotFoundError``. This monkey-patch makes Ray + pip-install TQ into a per-actor runtime_env on first spawn (cached + per-node by Ray afterwards). Idempotent. Couples us to TQ's internal + class layout — if TQ restructures, this becomes a no-op with a + logged warning and we fall back to per-node ``uv sync``. + """ + global _TQ_RUNTIME_ENV_PATCHED + if _TQ_RUNTIME_ENV_PATCHED: + return + + runtime_env = {"pip": ["TransferQueue==0.1.6"]} + + def _install(cls) -> bool: + if not hasattr(cls, "options"): + return False + original = cls.options + + def patched(*args, **kwargs): + kwargs.setdefault("runtime_env", runtime_env) + return original(*args, **kwargs) + + cls.options = patched # type: ignore[method-assign] + return True + + patched_any = False + try: + from transfer_queue.storage.simple_backend import SimpleStorageUnit + + patched_any |= _install(SimpleStorageUnit) + except ImportError: + pass + try: + from transfer_queue.controller import TransferQueueController + + patched_any |= _install(TransferQueueController) + except ImportError: + pass + + if not patched_any: + # Soft-fail: TQ may have moved its actor classes. The driver will + # still work; multi-node TQ may need the per-node `uv sync` workaround. + import warnings + + warnings.warn( + "Could not patch TQ actor classes for runtime_env injection. " + "Multi-node TQ may fail with ModuleNotFoundError: 'transfer_queue' " + "on worker nodes. Workaround: run `uv sync` inside each node's " + "container before the driver runs.", + RuntimeWarning, + stacklevel=2, + ) + _TQ_RUNTIME_ENV_PATCHED = True + + +def _init_tq(cfg: DataPlaneConfig) -> None: + """Driver-process path: bootstrap the TQ controller for the chosen backend.""" + from omegaconf import OmegaConf + + tq = _tq() + base = OmegaConf.load(str(resources.files("transfer_queue") / "config.yaml")) + + backend = cfg.get("backend", "simple") + storage_capacity = cfg.get("storage_capacity", 1_000_000) + num_storage_units = cfg.get("num_storage_units", 2) + + # polling_mode=True: controller returns empty BatchMeta instead of raising + # TimeoutError when no samples are ready yet. The client-side blocking + # loop in `claim_meta` drives the retry cadence. + controller_overlay = {"controller": {"polling_mode": True}} + + if backend == "simple": + overlay = { + **controller_overlay, + "backend": { + "storage_backend": "SimpleStorage", + "SimpleStorage": { + "total_storage_size": storage_capacity, + "num_data_storage_units": num_storage_units, + }, + }, + } + elif backend == "mooncake_cpu": + # The mooncake-transfer-engine wheel ships `mooncake_master` at + # /mooncake/, NOT on $PATH. TQ's + # subprocess.Popen(["mooncake_master", ...]) fails with + # FileNotFoundError unless we put the package dir on PATH first. + import mooncake # type: ignore[import-not-found] + + # TQ's mooncake_client masks any underlying ImportError as + # "Please install via pip install mooncake-transfer-engine". + # Force the real cause (e.g. ``libcudart.so.X: cannot open + # shared object file``) to surface by importing here. + import mooncake.store # type: ignore[import-not-found] # noqa: F401 + + _moon_pkg = os.path.dirname(mooncake.__file__) + _master = os.path.join(_moon_pkg, "mooncake_master") + try: + os.chmod(_master, 0o755) + except OSError as e: + if not os.access(_master, os.X_OK): + raise RuntimeError( + f"Failed to make {_master} executable: {e}. " + f"Mooncake bootstrap requires this binary." + ) from e + _existing_path = os.environ.get("PATH", "") + if _moon_pkg not in _existing_path.split(os.pathsep): + os.environ["PATH"] = _moon_pkg + os.pathsep + _existing_path + # Per-process MC_TCP_BIND_ADDRESS / KV-path promotion already + # set by TQDataPlaneClient.__init__ (runs on every process, + # including this driver). _init_tq only needs local_ip below + # for the metadata/master server URLs (driver-bound). + local_ip = _get_local_node_ip() + if not local_ip: + raise RuntimeError( + "Mooncake backend requires a local node IP; " + "_get_local_node_ip() returned empty." + ) + # Mooncake virtual segment / local buffer sizing. Defaults sized + # for production-scale rollouts (multi-iter DAPO, large + # message_log object payloads); under-sized values cause + # ``batch_get_tensor returned None`` once mooncake exhausts its + # internal allocator headroom. Lazy-mmap'd, so RSS is bounded + # by actual traffic. Override per-recipe via + # ``data_plane.global_segment_size`` / + # ``data_plane.local_buffer_size`` (bytes). + overlay = { + **controller_overlay, + "backend": { + "storage_backend": "MooncakeStore", + "MooncakeStore": { + # pyrefly: ignore # no-matching-overload + "global_segment_size": int( + cfg.get("global_segment_size", 512 * 1024**3) + ), + # pyrefly: ignore # no-matching-overload + "local_buffer_size": int( + cfg.get("local_buffer_size", 64 * 1024**3) + ), + # _init_tq runs on the driver only — driver IS the + # head, so local_ip here is also the head's IP that + # mooncake_master + the metadata server bind to. + "metadata_server": f"{local_ip}:50050", + "master_server_address": f"{local_ip}:50051", + **_mooncake_transport_config(), + }, + }, + } + else: + raise ValueError(f"unknown TQ backend: {backend!r}") + + conf = OmegaConf.merge(base, overlay) + + # Inject runtime_env into TQ's actor spawn so SimpleStorageUnit / + # TransferQueueController land on workers with transfer_queue available + # — see _patch_tq_actor_runtime_env() docstring for the why. + _patch_tq_actor_runtime_env() + + # pyrefly: ignore # bad-argument-type + tq.init(conf=conf) + + +# ────────────────────────────────────────────────────────────────────────── +# Adapter-level enforcement that nothing but tensors crosses the bus. +# ────────────────────────────────────────────────────────────────────────── + + +def _promote_1d_leaves(td: TensorDict) -> TensorDict: + """Unsqueeze 1D tensor leaves to ``(N, 1)`` — mooncake_cpu KV-path workaround. + + Works around TQ's ``KVStorageManager`` 1D schema/data mismatch; + :func:`_from_wire` squeezes the trailing 1 back on read. Symmetric + with `_from_wire` — callers gate on ``self._promote_1d``. + ``NonTensorStack`` / ``NonTensorData`` leaves pass through. + + Args: + td: ``TensorDict`` whose 1D tensor leaves should be promoted. + + Returns: + ``TensorDict`` with 1D tensor leaves unsqueezed to ``(N, 1)``; + all other leaves pass through unchanged. + """ + new_dict: dict[str, torch.Tensor] = {} + changed = False + for k in td.keys(include_nested=True, leaves_only=True): + v = td.get(k) + if isinstance(v, torch.Tensor) and not v.is_nested and v.dim() == 1: + new_dict[str(k)] = v.unsqueeze(-1).contiguous() + changed = True + else: + # pyrefly: ignore # bad-argument-type + new_dict[str(k)] = v + if not changed: + return td + return TensorDict(new_dict, batch_size=td.batch_size) + + +def _from_wire(td: TensorDict) -> TensorDict: + """Inverse of `_promote_1d_leaves`: squeeze trailing 1 back to (N,).""" + new_dict: dict[str, torch.Tensor] = {} + changed = False + for k in td.keys(include_nested=True, leaves_only=True): + v = td.get(k) + if ( + isinstance(v, torch.Tensor) + and not v.is_nested + and v.dim() >= 2 + and v.shape[-1] == 1 + ): + new_dict[str(k)] = v.squeeze(-1).contiguous() + changed = True + else: + # pyrefly: ignore # bad-argument-type + new_dict[str(k)] = v + if not changed: + return td + return TensorDict(new_dict, batch_size=td.batch_size) + + +# ────────────────────────────────────────────────────────────────────────── +# Per-partition record kept client-side for register_partition semantics +# (TQ creates partitions implicitly on first put — this is bookkeeping +# that lets `kv_clear(keys=None)` and the consumer-task list survive +# without a controller round-trip). +# ────────────────────────────────────────────────────────────────────────── + + +@dataclass +class _PartitionRecord: + fields: list[str] + num_samples: int + consumer_tasks: list[str] + grpo_group_size: int | None + enums: dict[str, list[str]] + seen_keys: set[str] = field(default_factory=set) + + +class TQDataPlaneClient(DataPlaneClient): + """Adapter faƧade — maps NeMo-RL calls onto TransferQueue's public API.""" + + def __init__(self, cfg: DataPlaneConfig, *, bootstrap: bool = True) -> None: + """Construct a TQ-backed client. + + Args: + cfg: data-plane config (backend selection, poll cadence, …). + bootstrap: True (driver) bootstraps the TQ controller using + ``cfg``. False (worker) connects this process to an + already-running named controller actor in the Ray + cluster — ``cfg`` is then only consulted for client-side + knobs (poll interval). + """ + # mooncake_cpu setup must run BEFORE _init_tq / _connect_existing + # — once tq.init/connect runs, Mooncake's engine.so reads the + # env vars and they can't be changed. Three per-process knobs + # needed in EVERY process that builds a TQ client (driver, + # SyncRolloutActor, every MegatronPolicyWorker rank): + # 1. MC_TCP_BIND_ADDRESS — Mooncake engine.so writes this into + # desc.ip_or_host_name, the address peers receive from the + # metadata service. Without it, getifaddrs()[0] picks usb0 + # (169.254.x APIPA) and peers fail to connect. + # 2. MC_STORE_MEMCPY=0 — Mooncake LOCAL_MEMCPY fast-path + # reinterpret_casts cross-process pointers, segfaulting + # MemcpyWorkerPool. PR #1995 (merged 2026-04-30) fixes the + # root cause but isn't in any published wheel yet + # (mooncake-transfer-engine 0.3.10.post2 was bumped before + # that merge). Drop this once the wheel includes the fix. + # 3. KV-path 1D promotion — works around TQ's + # extract_field_schema schema/data mismatch for 1D fields. + if cfg.get("backend") == "mooncake_cpu": + local_ip = _get_local_node_ip() + if local_ip: + # Force-assign per-process: Ray actors inherit env vars + # from the driver, so a setdefault on the worker would + # be a no-op and the actor would announce the driver's + # IP — peers fail with "connection refused". + os.environ["MC_TCP_BIND_ADDRESS"] = local_ip + os.environ.setdefault("MC_STORE_MEMCPY", "0") + + # Workaround for TQ KVStorageManager's 1D-field schema/data + # mismatch (only `mooncake_cpu` goes through that path; `simple` + # is unaffected). Writer unsqueezes 1D → (N, 1) on put; reader + # squeezes the trailing 1 back on get. Drop when upstream TQ + # unifies the schema/data shapes for 1D fields. + self._promote_1d = cfg["backend"] == "mooncake_cpu" + + if bootstrap: + _init_tq(cfg) + else: + _connect_existing() + # `self._tq` is the transfer_queue module: KV ops (`kv_batch_*`, + # `kv_clear`) are module-level helpers; metadata ops (`claim_meta`, + # `check_consumption_status`) go through `self._tq.get_client()`. + self._tq = _tq() + self._poll_interval_s = cfg.get("claim_meta_poll_interval_s", 0.5) + self._partitions: dict[str, _PartitionRecord] = {} + self._closed = False + + # ── (A) task-mediated ─────────────────────────────────────────────── + + def register_partition( + self, + partition_id: str, + fields: list[str], + num_samples: int, + consumer_tasks: list[str], + grpo_group_size: int | None = None, + enums: dict[str, list[str]] | None = None, + ) -> None: + # Client-side bookkeeping. TQ creates partitions implicitly on + # first kv_batch_put; pre-registration is for our own validation + # and the kv_clear(keys=None) recovery path. + self._partitions[partition_id] = _PartitionRecord( + fields=list(fields), + num_samples=int(num_samples), + consumer_tasks=list(consumer_tasks), + grpo_group_size=grpo_group_size, + enums=dict(enums) if enums else {}, + ) + + def claim_meta( + self, + partition_id: str, + task_name: str, + required_fields: list[str], + batch_size: int, + dp_rank: int | None = None, + blocking: bool = True, + timeout_s: float = 60.0, + ) -> KVBatchMeta: + client = self._tq.get_client() + deadline = time.time() + max(0.0, timeout_s) + sampling_config: dict[str, Any] = {} + if dp_rank is not None: + sampling_config["dp_rank"] = dp_rank + + while True: + tq_meta = client.get_meta( + data_fields=list(required_fields), + batch_size=int(batch_size), + partition_id=partition_id, + task_name=task_name, + mode="fetch", + sampling_config=sampling_config, + ) + if getattr(tq_meta, "size", 0) > 0: + break + if not blocking: + return KVBatchMeta( + partition_id=partition_id, + task_name=task_name, + keys=[], + fields=list(required_fields), + ) + if time.time() >= deadline: + raise TimeoutError( + f"claim_meta(partition={partition_id}, task={task_name}) " + f"timed out after {timeout_s}s" + ) + time.sleep(self._poll_interval_s) + + keys: list[str] = client.kv_retrieve_keys( + global_indexes=list(tq_meta.global_indexes), + partition_id=partition_id, + ) + + # Lift sequence lengths from the rollout-side `input_lengths` tag + # if present. Driver-side balancing (shard_meta_for_dp) needs + # this; the task-mediated path does not. + tags = tq_meta.custom_meta or [{} for _ in keys] + seqlens: list[int] | None = None + if tags and any("input_lengths" in t for t in tags): + seqlens = [int(t.get("input_lengths", 0)) for t in tags] + + return KVBatchMeta( + partition_id=partition_id, + task_name=task_name, + keys=keys, + fields=list(required_fields), + sequence_lengths=seqlens, + ) + + def get_data( + self, + meta: KVBatchMeta, + select_fields: list[str] | None = None, + ) -> TensorDict: + fields = select_fields if select_fields is not None else meta.fields + if fields is None: + raise ValueError( + "get_data requires either select_fields or meta.fields; " + "silently fetching all fields is forbidden." + ) + return self.kv_batch_get(meta.keys, meta.partition_id, list(fields)) + + def check_consumption_status( + self, partition_id: str, task_names: list[str] + ) -> bool: + client = self._tq.get_client() + for t in task_names: + try: + ok = client.check_consumption_status( + task_name=t, partition_id=partition_id + ) + except Exception: + return False + if not ok: + return False + return True + + # ── (B) direct-by-key ────────────────────────────────────────────── + + def kv_batch_put( + self, + keys: list[str], + partition_id: str, + fields: TensorDict | None = None, + tags: list[dict[str, Any]] | None = None, + ) -> KVBatchMeta: + if not keys: + return KVBatchMeta( + partition_id=partition_id, task_name=None, keys=[], fields=None + ) + if tags is None: + tags = [{} for _ in keys] + + wire_fields: TensorDict | None = None + field_names: list[str] | None = None + if fields is not None: + # No ``.contiguous()``: under tensordict==0.12.2 it strips + # non-tensor leaves (NonTensorStack stored as LinkedList) to empty + # TDs. TQ's encoder forces ``.contiguous()`` per tensor leaf + # itself, so the call here was redundant for tensors and + # destructive for non-tensors. + wire_fields = fields.detach() # type: ignore[bad-assignment,missing-argument] + if self._promote_1d: + wire_fields = _promote_1d_leaves(wire_fields) # type: ignore[bad-argument-type] + field_names = list(wire_fields.keys()) + + self._tq.kv_batch_put( + keys=list(keys), + partition_id=partition_id, + fields=wire_fields, + tags=tags, + ) + + rec = self._partitions.get(partition_id) + if rec is not None: + rec.seen_keys.update(keys) + + return KVBatchMeta( + partition_id=partition_id, + task_name=None, + keys=list(keys), + fields=field_names, + ) + + def kv_batch_get( + self, + keys: list[str], + partition_id: str, + select_fields: list[str], + ) -> TensorDict: + if not keys: + return TensorDict({}, batch_size=(0,)) + td = self._tq.kv_batch_get( + keys=list(keys), + partition_id=partition_id, + select_fields=select_fields, + ) + if self._promote_1d: + td = _from_wire(td) + return td + + def kv_clear(self, keys: list[str] | None, partition_id: str) -> None: + if keys is None: + rec = self._partitions.pop(partition_id, None) + keys = list(rec.seen_keys) if rec is not None else [] + if not keys: + try: + listing = self._tq.kv_list(partition_id=partition_id) + keys = list(listing.get(partition_id, {}).keys()) + except Exception: + keys = [] + else: + self._partitions.pop(partition_id, None) + if keys: + self._tq.kv_clear(keys=list(keys), partition_id=partition_id) + + # ── (C) lifecycle ────────────────────────────────────────────────── + + def close(self) -> None: + if self._closed: + return + self._closed = True + try: + self._tq.close() + except Exception: + pass diff --git a/nemo_rl/data_plane/codec.py b/nemo_rl/data_plane/codec.py new file mode 100644 index 0000000000..e35ea19097 --- /dev/null +++ b/nemo_rl/data_plane/codec.py @@ -0,0 +1,363 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Wire <-> trainer codec — jagged-on-the-wire bridge. + +* Writer side: variable-length fields are encoded as +``torch.nested.nested_tensor`` with ``layout=torch.jagged`` before +``kv_batch_put``. Padding tax is paid only when a consumer needs a +rectangular tensor. + +* Reader side: :func:`materialize` accepts the wire TensorDict and, +when ``layout='padded'``, calls +:func:`torch.nested.to_padded_tensor` on any nested leaves using +the per-field padding value supplied in ``pad_value_dict``. Trainer +code consumes the padded BatchedDataDict unchanged. + +* Worker write-backs that produce ``response``-shaped outputs use +:func:`response_from_nested` to extract the response slice from a +(prompt+response) nested tensor. + +* Non-tensor object fields ride as ``NonTensorStack`` / ``NonTensorData`` +leaves (TQ-native passthrough). :func:`materialize` decodes them back +to ``np.ndarray(dtype=object)`` for the trainer. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Any + +import numpy as np +import torch +from tensordict import TensorDict, TensorDictBase + +from nemo_rl.data_plane.schema import Layout + +if TYPE_CHECKING: + # Type-only import. At runtime, BatchedDataDict is loaded lazily + # inside materialize() — see comment there for rationale. + from nemo_rl.distributed.batched_data_dict import BatchedDataDict + + +# ── Padded ↔ nested helpers ─────────────────────────────────────────── + + +def to_nested_by_length( + padded: torch.Tensor, + lengths: torch.Tensor, +) -> torch.Tensor: + """Strip right-padding off a rectangular tensor using per-row lengths. + + Used by the producer side: convert + :func:`batched_message_log_to_flat_message` output (already padded) + into the wire format before ``kv_batch_put``. + + Args: + padded: Rectangular tensor of shape ``(N, S, ...)``. + lengths: Per-row valid lengths, shape ``(N,)``. CUDA tensors are + moved to CPU once to avoid per-row syncs. + + Returns: + A ``torch.jagged`` nested tensor whose i-th row is + ``padded[i, :lengths[i], ...]``. + """ + if padded.dim() < 2: + raise ValueError( + f"to_nested_by_length expects (N, S, ...); got shape {tuple(padded.shape)}" + ) + n = padded.shape[0] + if lengths.shape != (n,): + raise ValueError( + f"lengths shape {tuple(lengths.shape)} != ({n},) (rows of padded)" + ) + # Single sync — without this, the per-row ``.item()`` below would + # GPU-sync N times if ``lengths`` lives on CUDA. + lens = lengths.cpu().tolist() if lengths.is_cuda else lengths.tolist() + rows = [padded[i, : lens[i]] for i in range(n)] + return torch.nested.as_nested_tensor(rows, layout=torch.jagged) + + +def stack_or_nest(tensors: list[torch.Tensor]) -> torch.Tensor: + """Stack equal-shape rows; reconstruct as jagged nested when ragged. + + Args: + tensors: Per-row tensors; assumed to share leading dims modulo + an optional ragged seq dim. Empty list returns ``torch.empty(0)``. + + Returns: + A regular tensor when all rows share shape; otherwise a + ``torch.jagged`` nested tensor. + """ + if not tensors: + return torch.empty(0) + first_shape = tensors[0].shape + if all(t.shape == first_shape for t in tensors): + return torch.stack(tensors, dim=0) + return torch.nested.as_nested_tensor(tensors, layout=torch.jagged) + + +def unwrap_wire_stripped_payload(item: Any) -> Any: + """Recover the payload of a possibly wire-stripped ``NonTensorData``. + + TQ's ``MsgpackEncoder._encode_tensordict`` serializes any + ``TensorDictBase`` via ``dict(obj.items())`` — only the tensor + backing dict. ``NonTensorData`` stores its payload in + ``_non_tensordict["data"]``, so it round-trips through ZMQ as an + empty ``TensorDict({}, batch_size=[])``. We map only that exact + signature to ``None``; any other ``TensorDictBase`` (with tensor + fields, non-scalar batch, or a salvageable ``_non_tensordict`` + payload) passes through unchanged so we never drop real data. + """ + nt = getattr(item, "_non_tensordict", None) + if isinstance(nt, dict) and "data" in nt: + return nt["data"] + if ( + isinstance(item, TensorDictBase) + and item.batch_dims == 0 + and len(item.keys()) == 0 + ): + return None + return item + + +def maybe_pack_jagged( + val: torch.Tensor, + lengths: torch.Tensor, +) -> torch.Tensor: + """Convert ``val`` to jagged iff it looks like a per-token field. + + Used by every write site (initial put, driver delta-write, worker + write-back) so all per-token fields land in TQ as jagged with the + same row lengths — read-time materialization then pads them all to + the same target shape, avoiding shape-mismatch crashes between + mixed wire formats. + + Args: + val: Tensor to consider. Qualifies for jagged conversion only + when ``val.shape == (N, max(lengths), ...)`` where + ``N == lengths.shape[0]``. + lengths: Per-row valid lengths, shape ``(N,)``. + + Returns: + A ``torch.jagged`` nested tensor when the shape heuristic matches; + otherwise ``val`` passed through as a rectangular tensor. + """ + n = lengths.shape[0] + if n == 0: + return val.detach().contiguous() + max_len = int(lengths.max().item()) + if val.dim() < 2 or val.shape[0] != n or val.shape[1] != max_len: + return val.detach().contiguous() + return to_nested_by_length(val.detach(), lengths) + + +def pack_jagged_fields( + fields: "dict[str, torch.Tensor | np.ndarray]", + *, + lengths: torch.Tensor | None, +) -> TensorDict: + """Pack a column dict into the wire layout expected by ``kv_batch_put``. + + Zero-copy where possible: per-token tensors that match + ``(N, max(lengths), ...)`` become ``torch.jagged`` views via + :func:`maybe_pack_jagged`; non-conforming tensors pass through + rectangular; ``np.ndarray(dtype=object)`` is forwarded as-is. This + is a **layout transform**, not serialization — the on-wire bytes are + produced later by the TQ backend's msgpack encoder. Centralizing + the transform here makes it the single source of truth for both + :func:`kv_first_write` and :func:`write_columns`. + + Args: + fields: Column name → tensor or object array. Other value types + raise ``TypeError``. + lengths: Per-row valid lengths used by :func:`maybe_pack_jagged` + to decide whether a tensor qualifies for jagged conversion. + ``None`` disables jagged conversion entirely (every tensor + passes through rectangular). + + Returns: + ``TensorDict`` with ``batch_size=[N]`` (N from ``lengths`` if + given, else 0) ready for ``kv_batch_put``. + """ + n = int(lengths.shape[0]) if lengths is not None else 0 + packed: dict[str, Any] = {} + for k, v in fields.items(): + if isinstance(v, np.ndarray) and v.dtype == object: + # tensordict==0.12.2 wire bug: a NonTensorStack stored as a + # TensorDict leaf returns as a LinkedList on parent + # __getitem__, losing identity. ndarray(dtype=object) + # round-trips intact. + packed[k] = v + elif isinstance(v, torch.Tensor): + packed[k] = ( + maybe_pack_jagged(v, lengths) + if lengths is not None + else v.detach().contiguous() + ) + else: + raise TypeError( + f"pack_jagged_fields: unsupported value type for {k!r}: {type(v)}. " + "Use torch.Tensor or np.ndarray(dtype=object)." + ) + return TensorDict(packed, batch_size=[n]) + + +def pack_per_token_field(val: torch.Tensor, lengths: torch.Tensor) -> torch.Tensor: + """Force-jaggedize a known per-token field, tolerating SP padding. + + Unlike :func:`maybe_pack_jagged` (which is shape-strict to avoid + false positives on 3D extras like image features), this function is + invoked at write-back sites where the caller already knows the + field is per-token (e.g. ``prev_logprobs``, + ``reference_policy_logprobs``). mcore SP rounds the forward + output's seq dim up to a multiple of TP, so the value can be 1+ + tokens wider than ``max(lengths)``; :func:`to_nested_by_length` + slices each row to its own length and drops the trailing SP + padding cleanly. + + Args: + val: Per-token tensor. Falls back to rectangular when it cannot + be jaggedized (wrong batch dim, < 2D, or seq dim shorter + than ``max(lengths)``). + lengths: Per-row valid lengths, shape ``(N,)``. + + Returns: + A ``torch.jagged`` nested tensor when the shape allows; + otherwise ``val`` passed through as a rectangular tensor. + """ + n = lengths.shape[0] + if n == 0: + return val.detach().contiguous() + max_len = int(lengths.max().item()) + if val.dim() < 2 or val.shape[0] != n or val.shape[1] < max_len: + return val.detach().contiguous() + return to_nested_by_length(val.detach(), lengths) + + +def response_from_nested( + full: torch.Tensor, + response_mask: torch.Tensor, +) -> torch.Tensor: + """Extract the response slice from a (prompt+response) nested tensor. + + Used on the worker side for logprob / ref-logprob write-back where + only the response-token slice is interesting downstream. The + "left-shift by one token" convention is applied (so logprobs at + output position i correspond to the prediction of input token i+1). + + Args: + full: Jagged nested tensor of shape + ``(N, prompt_len + response_len)``. + response_mask: Jagged nested tensor of shape + ``(N, response_len)``; its ``offsets().diff()`` gives the + per-row response length. + + Returns: + Jagged nested tensor of shape ``(N, response_len)`` containing + the left-shifted response slice. + """ + values = full.values() + offsets = full.offsets() + response_lens = response_mask.offsets().diff() + response_list = [] + for resp_len, seq_offset in zip(response_lens, offsets[1:], strict=True): + # left-shift output by one token for log_probs / values + response_list.append(values[seq_offset - resp_len - 1 : seq_offset - 1]) + return torch.nested.as_nested_tensor(response_list, layout=torch.jagged) + + +# ── materialize: wire TensorDict → trainer BatchedDataDict ──────────── + + +def materialize( + td: TensorDict, + layout: Layout = "padded", + pad_value_dict: dict[str, int | float] | None = None, + pad_to_multiple: int = 1, +) -> "BatchedDataDict[Any]": + """Convert a wire TensorDict to a BatchedDataDict. + + Trainer/worker code expects rectangular tensors — this is the + bridge from the on-wire nested format. + + The lazy ``BatchedDataDict`` import keeps + ``import nemo_rl.data_plane`` cheap for unit tests that don't + actually call this function (``BatchedDataDict`` transitively + pulls multimodal deps like decord / torchvision). + + Args: + td: Wire TensorDict to materialize. + layout: ``"padded"`` (default) pads nested-tensor leaves via + :func:`torch.nested.to_padded_tensor` using + ``pad_value_dict[k]`` (or 0 if unspecified); rectangular + leaves pass through. ``"jagged"`` passes nested leaves + through — use only when the caller knows how to consume + them. + pad_value_dict: Per-field pad value used when ``layout='padded'``. + pad_to_multiple: Round the seq dim up to the next multiple after + ``to_padded_tensor``. Required when downstream backends + impose alignment (mcore SP needs ``seq_len % TP == 0``; + PyTorch CP needs ``seq_len % (CP * 2) == 0``). Default 1 + disables extra alignment. + + Returns: + ``BatchedDataDict`` with rectangular tensors for padded layout, + nested tensors for jagged layout, and ``np.ndarray(dtype=object)`` + for ``NonTensorStack`` leaves (TQ-native non-tensor passthrough). + """ + from tensordict import NonTensorData, NonTensorStack + + from nemo_rl.distributed.batched_data_dict import BatchedDataDict + + if pad_to_multiple < 1: + raise ValueError(f"pad_to_multiple must be >= 1, got {pad_to_multiple}") + pads = pad_value_dict or {} + out: dict[str, Any] = {} + # pyrefly: inference cycle on tensordict.items() loop var. + for key, val in td.items(include_nested=False): # type: ignore[bad-assignment] + if isinstance(val, NonTensorStack): + # ``np.asarray(list, dtype=object)`` would probe each item's + # ``__iter__`` to detect a nested array. A wire-stripped TD + # has ``batch_dims=0`` → its ``__iter__`` raises + # ``StopIteration`` → ``RuntimeError: generator raised + # StopIteration``. ``np.empty + assignment`` skips that + # probe; ``unwrap_wire_stripped_payload`` normalizes both + # live ``NonTensorData`` and stripped TDs. + items = val.tolist() + arr = np.empty(len(items), dtype=object) + for i, item in enumerate(items): + arr[i] = unwrap_wire_stripped_payload(item) + out[key] = arr + continue + if isinstance(val, NonTensorData): + out[key] = np.asarray([val.data], dtype=object) + continue + if not isinstance(val, torch.Tensor): + raise TypeError( + f"materialize() received unexpected leaf type for {key!r}: " + f"{type(val)}. Expected Tensor or NonTensorStack." + ) + if val.is_nested and layout == "padded": + pad = pads.get(key, 0) + padded = torch.nested.to_padded_tensor(val, padding=pad) + if pad_to_multiple > 1 and padded.dim() >= 2: + seq_dim = padded.shape[1] + rem = seq_dim % pad_to_multiple + if rem != 0: + extra = pad_to_multiple - rem + pad_spec = [0, 0] * (padded.dim() - 2) + [0, extra] + padded = torch.nn.functional.pad(padded, pad_spec, value=pad) + out[key] = padded + else: + out[key] = val + return BatchedDataDict(out) diff --git a/nemo_rl/data_plane/column_io.py b/nemo_rl/data_plane/column_io.py new file mode 100644 index 0000000000..63c0a2ed2c --- /dev/null +++ b/nemo_rl/data_plane/column_io.py @@ -0,0 +1,181 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Column-level helpers above :class:`DataPlaneClient`. + +These are thin wrappers around :meth:`kv_batch_get` / :meth:`kv_batch_put` +that operate on **columns** (named fields) of a partition — not on the +driver process specifically. The driver uses them to fetch a slice and +materialize / write deltas back; worker-side dispatches use the +equivalents on ``AbstractPolicyWorker`` (``self._fetch(meta)`` / +``self._write_back``). + + * :func:`read_columns` — ``kv_batch_get + materialize`` (decode jagged + + object-array fields into a :class:`BatchedDataDict`). + * :func:`write_columns` — pack-to-wire + ``kv_batch_put`` for deltas + against an existing :class:`KVBatchMeta`. + * :func:`kv_first_write` — pack-to-wire + ``kv_batch_put`` for the + rollout-actor's first put of a partition. Returns a new + :class:`KVBatchMeta`. +""" + +from typing import Any, Sequence + +import numpy as np +import torch + +from nemo_rl.data.llm_message_utils import attach_message_log_view +from nemo_rl.data_plane.codec import materialize, pack_jagged_fields +from nemo_rl.data_plane.interfaces import DataPlaneClient, KVBatchMeta +from nemo_rl.data_plane.schema import Layout +from nemo_rl.distributed.batched_data_dict import BatchedDataDict + + +def read_columns( + dp_client: DataPlaneClient, + meta: KVBatchMeta, + select_fields: Sequence[str], + *, + layout: Layout = "padded", + pad_value_dict: dict[str, Any] | None = None, +) -> BatchedDataDict[Any]: + """``kv_batch_get(meta.keys, select_fields=...) → materialize``. + + ``pad_to_multiple`` is read from ``meta.extra_info`` so the + materialized seq dim matches the alignment downstream backends + require (mcore SP / PyTorch CP). Non-tensor object fields ride as + ``NonTensorStack`` leaves; :func:`materialize` unwraps them to + ``np.ndarray(dtype=object)``. + + Args: + dp_client: Data-plane client used for the underlying fetch. + meta: ``KVBatchMeta`` describing the keys to fetch. + select_fields: Fields to fetch. + layout: Materialization layout (``"padded"`` or ``"jagged"``). + pad_value_dict: Per-field pad value for jagged tensors (e.g. + ``input_ids → pad_token_id``); defaults to 0. + + Returns: + ``BatchedDataDict`` with the requested fields, materialized. + """ + td = dp_client.kv_batch_get( + keys=meta.keys, + partition_id=meta.partition_id, + select_fields=list(select_fields), + ) + pad_mult = int((meta.extra_info or {}).get("pad_to_multiple", 1)) + data = materialize( + td, + layout=layout, + pad_value_dict=pad_value_dict, + pad_to_multiple=pad_mult, + ) + attach_message_log_view(data) + return data + + +def write_columns( + dp_client: DataPlaneClient, + meta: KVBatchMeta, + fields: "dict[str, torch.Tensor | np.ndarray]", +) -> None: + """``kv_batch_put(meta.keys, fields=...)``. + + Per-token tensor fields are converted to jagged via + :func:`pack_jagged_fields` so they land in TQ with the same row + lengths as the initial put. ``np.ndarray(dtype=object)`` leaves + pass through as-is. + + Args: + dp_client: Data-plane client used for the underlying put. + meta: ``KVBatchMeta`` describing the keys being written. + fields: Map of field name to tensor or object array. + """ + if not fields: + return + + seq_lens = meta.sequence_lengths + lengths = torch.tensor(seq_lens, dtype=torch.long) if seq_lens is not None else None + td = pack_jagged_fields(fields, lengths=lengths) + dp_client.kv_batch_put( + keys=meta.keys, + partition_id=meta.partition_id, + fields=td, + ) + + +def kv_first_write( + final_batch_cpu: BatchedDataDict[Any], + *, + keys: Sequence[str], + dp_client: DataPlaneClient, + partition_id: str, + extra_info: dict[str, Any] | None = None, + task_name: str = "train", + pad_to_multiple: int = 1, +) -> KVBatchMeta: + """Single flat ``kv_batch_put`` of every tensor field in ``final_batch_cpu``. + + The rollout actor's first put of a partition. Caller mints + ``keys`` (verl-style) — the helper is rollout-shape-agnostic. + + Args: + final_batch_cpu: Rollout output already on CPU. Must contain + ``"sample_mask"`` (used as batch-size oracle: ``shape[0] == N``) + and ``"input_lengths"`` (per-row valid lengths for the jagged + pack). Tensor fields are packed jagged via + :func:`pack_jagged_fields`; ``np.ndarray(dtype=object)`` + leaves pass through. + keys: Pre-minted per-sample keys, one per row of + ``final_batch_cpu``. + dp_client: Data-plane client used for the put. + partition_id: TQ partition to write into. + extra_info: Optional extra fields to attach to the returned meta. + task_name: Consumer task tag stamped on the returned meta. + pad_to_multiple: Seq-dim alignment recorded in ``extra_info`` so + readers pad to a multiple compatible with downstream backends + (mcore SP, PyTorch CP). + + Returns: + ``KVBatchMeta`` covering the written keys. + """ + n = int(final_batch_cpu["sample_mask"].shape[0]) + if n == 0 or len(keys) != n: + raise ValueError( + f"kv_first_write: keys ({len(keys)}) must match batch size ({n})" + ) + lengths = final_batch_cpu["input_lengths"] + fields: dict[str, torch.Tensor | np.ndarray] = { + k: v + for k, v in final_batch_cpu.items() + if isinstance(v, torch.Tensor) + or (isinstance(v, np.ndarray) and v.dtype == object) + } + td = pack_jagged_fields(fields, lengths=lengths) + dp_client.kv_batch_put( + keys=list(keys), + partition_id=partition_id, + fields=td, + ) + + extras = dict(extra_info or {}) + if pad_to_multiple > 1: + extras["pad_to_multiple"] = int(pad_to_multiple) + return KVBatchMeta( + partition_id=partition_id, + task_name=task_name, + keys=list(keys), + fields=list(td.keys()), + sequence_lengths=[int(s) for s in lengths.tolist()], + extra_info=extras, + ) diff --git a/nemo_rl/data_plane/docs/data-plane-api-lifecycle.md b/nemo_rl/data_plane/docs/data-plane-api-lifecycle.md new file mode 100644 index 0000000000..0b803c5d4b --- /dev/null +++ b/nemo_rl/data_plane/docs/data-plane-api-lifecycle.md @@ -0,0 +1,341 @@ +# Data Plane API & GRPO Lifecycle + +Companion to `data_plane_integration_plan.md`. Captures the runtime view: +what calls TQ, in what order, with what payloads — and how this differs +from verl's TQ-on-PPO trainer. + +Audience: anyone touching `nemo_rl/algorithms/grpo_sync.py`, +`nemo_rl/data_plane/`, or `nemo_rl/algorithms/sync_utils.py`. + +--- + +## 1. The API surface + +Everything goes through `DataPlaneClient` (`nemo_rl/data_plane/interfaces.py`). +Eight methods, three groups. Call sites in `nemo_rl/algorithms`, +`nemo_rl/experience`, and `nemo_rl/models` always go through this client — +they never `import transfer_queue` directly. That's the swappable boundary. + +### Lifecycle + +- `register_partition(partition_id, fields, num_samples, consumer_tasks, ...)` + declares the partition schema and which consumer tasks will read from it +- `close()` releases controller / storage handles + +### Task-mediated (consumer-counter aware) + +- `get_meta(partition_id, task_name, required_fields, batch_size) → KVBatchMeta` + discovers samples ready for `task_name`; advances TQ's per-task counter +- `get_data(meta, select_fields) → TensorDict` resolves a meta to data +- `check_consumption_status(...)` — bool + +### Direct-by-key (the hot path in sync 1-hop) + +- `kv_batch_put(keys, partition_id, fields)` — producer entrypoint; + flips `production_status[sample, field] = 1` as a side effect +- `kv_batch_get(keys, partition_id, select_fields) → TensorDict` — direct fetch +- `kv_clear(keys, partition_id)` — drop + +### Helpers built on top (`nemo_rl/data_plane/`) + +- `kv_first_write(batch, uids, ...) → KVBatchMeta` — single flat + `kv_batch_put` of all rollout fields +- `read_columns(client, meta, select)` — `kv_batch_get → materialize` +- `write_columns(client, meta, fields)` — typed `kv_batch_put` for deltas +- `shard_meta_for_dp(meta, dp_world)` — pure metadata split, no I/O, + no key remint +- `meta.subset(idxs)` / `meta.slice(start, stop)` / `meta.concat(other)` — pure metadata transforms (methods on `KVBatchMeta`) + (used by dynamic_sampling) + +--- + +## 2. Per-sample key invariant + +Mint **once** at rollout, reuse forever: + +``` + uid = "step17_prompt_42" # opaque, from driver dataset iter + key_i = f"{uid}_g{i}" # one per generation, i ∈ [0, n_gen) +``` + +Every `kv_batch_put` / `kv_batch_get` for that sample uses the same key. +Worker write-backs append columns; nothing remints. This is the same +invariant verl maintains (`{uid}_{session_id}_{i}`). + +--- + +## 3. E2E lifecycle for one GRPO step + +``` +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ DRIVER (grpo_sync.py) ─────────────────────────────┐ +│ │ +│ ā‘  register_partition(pid="step17", fields=[input_ids, ..., advantages, ...], │ +│ num_samples=N*G, consumer_tasks=["lp","ref","train"]) │ +│ │ +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”¬ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ + │ spawns + ā–¼ +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ SyncRolloutActor (Ray @remote) ───────────────────────────────────┐ +│ vllm.generate → flatten → mask → prompt extract │ +│ ā‘” kv_batch_put( keys=[uid_g0..uid_gN-1], │ +│ fields=TensorDict({input_ids, gen_logprobs, token_mask, ...})) │ +│ returns meta → driver │ +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”¬ā”€ā”˜ + │ + ā”Œā”€ DRIVER ─────────────────────────────────────────────────┐ │ + │ ā‘¢ shard_meta_for_dp(meta, dp_world=8) → [mā‚€..m₇] ā”‚ā—„ā”€ā”€ā”€ā”˜ + │ (pure metadata, no I/O, no key remint) │ + ā””ā”€ā”€ā”€ā”€ā”¬ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ + │ Ray-call per DP rank with mįµ¢ + ā–¼ +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ MegatronPolicyWorker[rank=i] (Ɨ8) ─────────────────────────────────┐ +│ ā‘£ kv_batch_get(keys=mįµ¢.keys, select=[input_ids, token_mask, ...]) │ +│ forward → prev_logprobs │ +│ ⑤ leader-only: kv_batch_put(keys=mįµ¢.keys, fields={prev_logprobs:T}) ── PHASE 1│ +│ │ +│ ā‘„ kv_batch_get(...) → ref_logprobs │ +│ ⑦ leader-only: kv_batch_put({reference_policy_logprobs:T}) ── PHASE 2│ +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”¬ā”€ā”˜ + │ + ā”Œā”€ DRIVER (small slice work, never bulk) ──────────────────┐ │ + │ ā‘§ read_columns(meta, select=[token_logprobs, rewards]) ā”‚ā—„ā”€ā”€ā”€ā”˜ + │ compute advantages (vectorized, on driver, tiny) │ + │ ⑨ write_columns(meta, {advantages: T}) │ + │ │ + │ [optional] dynamic_sampling: meta.subset(...) │ + │ [optional] kv_clear(dropped_keys) │ + ā””ā”€ā”€ā”€ā”€ā”¬ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ + │ shard_meta_for_dp again, Ray-call per rank + ā–¼ +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ MegatronPolicyWorker[rank=i] (Ɨ8) ─────────────────────────────────┐ +│ ā‘© kv_batch_get(select=[input_ids, prev_logprobs, ref_lp, advantages, masks]) │ +│ loss → grad → optimizer.step() │ +│ (no write-back: training is terminal for this partition) │ +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”¬ā”€ā”˜ + │ + ā”Œā”€ DRIVER (step-end housekeeping) ─────────────────────────┐ │ + │ ⑪ kv_batch_get(select=[input_ids]) ← stash for log_data ā”‚ā—„ā”€ā”€ā”€ā”˜ + │ ā‘« kv_clear(keys=meta.keys, partition_id=pid) │ + ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ + + (next step → ā‘  again with a fresh partition_id) +``` + +Mental model: **TQ is the bus, not a database.** It holds bulk between stages +of one step, then `kv_clear` drops it. Driver only handles small per-sample +slices; workers handle bulk via TQ. + +--- + +## 4. Call counts per step + +Steady state on the validation run (32 samples, 8 GPUs, no PP/TP): + +| TQ call | Site | Count / step | Payload | +|----------------------------|---------------------|-------------:|--------------------------------| +| `register_partition` | driver | 1 | metadata only | +| `kv_batch_put` (rollout) | SyncRolloutActor | 1 | full bulk (~600 KB; GBs at scale) | +| `shard_meta_for_dp` | driver | 3 | no I/O | +| `kv_batch_get` (lp inputs) | workers | 8 (per DP) | input slice | +| `kv_batch_put` (lp out) | workers (leader) | 1 | prev_logprobs delta | +| `kv_batch_get` (ref input) | workers | 8 | input slice | +| `kv_batch_put` (ref out) | workers (leader) | 1 | ref_logprobs delta | +| `kv_batch_get` (adv slice) | driver | 1 | small (rewards + token_lp) | +| `kv_batch_put` (advantages)| driver | 1 | small delta | +| `kv_batch_get` (train) | workers | 8 | full slice | +| `kv_batch_get` (log_data) | driver | 1 | input_ids only | +| `kv_clear` | driver | 1 | drop | + +Total: ~31 TQ RPCs / step. 16 of those are the per-DP fetch fan-out +(3 phases Ɨ 8 ranks āˆ’ overlaps). + +--- + +## 5. Concrete examples + +**Rollout produces (only first-write):** +```python +meta = kv_first_write( + final_batch_cpu=batch, + uids=[f"step{step}_p{i}" for i in range(num_prompts)], + dp_client=policy.dp_client, + partition_id=f"grpo_step_{step}", +) +# meta.keys = ["step17_p0_g0", "step17_p0_g1", ..., "step17_p7_g3"] +# meta.fields = ["input_ids", "input_lengths", "generation_logprobs", +# "token_mask", "sample_mask", ...] +``` + +**Driver appends a column (small delta, no bulk):** +```python +slice_ = read_columns(client, meta, select_fields=["token_logprobs", "rewards"]) +advantages = compute_advantages(slice_) # tiny driver compute +write_columns(client, meta, {"advantages": advantages}) +``` + +**Worker fan-out (driver):** +```python +shards = shard_meta_for_dp(meta, dp_world=8) +ray.get([ + worker[i].train_from_meta.remote(shards[i]) + for i in range(8) +]) +``` + +**Worker fetch + leader write-back (in `base_policy_worker._write_back`):** +```python +inputs = read_columns(self._dp_client, meta, select_fields=LP_SEED_FIELDS) +prev_lp = self.forward(inputs) +if self._is_replica_leader(): + write_columns(self._dp_client, meta, {"prev_logprobs": prev_lp}) +``` + +**Step-end teardown:** +```python +log_input_ids = read_columns(client, meta, select_fields=["input_ids"]) +client.kv_clear(keys=meta.keys, partition_id=meta.partition_id) +``` + +--- + +## 6. High-level comparison with verl + +verl's TQ-aware trainer lives in +`verl/verl/trainer/main_ppo_sync.py`. Same TQ primitive (`tq.kv_batch_put` / +`kv_batch_get` / `kv_clear`), but a different *integration shape*: + +| Dimension | verl (`main_ppo_sync.py`) | nemo-rl (sync 1-hop) | +|------------------------|----------------------------------------------------------|---------------------------------------------------| +| API surface | `tq.*` module functions | `DataPlaneClient` ABC, swappable adapters | +| Init | `tq.init()` once globally | `register_partition` per step | +| Generation actor | Per-prompt async `AgentLoopWorkerTQ`s; each writes when its agent loop finishes | One batched `SyncRolloutActor`; single put after all generations done | +| Producer→consumer signal | Tags (`{"global_steps": N, "status": "success"}`) polled by `ReplayBuffer` background thread | Controller-side `production_status` bit; consumers wait on field production | +| Step gate | `ReplayBuffer.sample()` blocks until all prompts of `global_steps` are tagged success | Rollout actor's `ray.get()` returns only when entire batch done | +| Driver-side compute | Driver pulls **bulk** (full input_ids + response_mask) for `_compute_old_log_prob`, `_compute_values`, `_compute_advantage` | Driver only touches **small slices** (advantages-input, log_data) | +| Worker fan-out | Workers receive full meta, do their own internal sharding | Driver `shard_meta_for_dp` fan-out, workers receive pre-sliced meta | +| Async API | `tq.async_kv_batch_put` used at agent-loop tail | Sync only (deliberately simplified — see §1.2 of integration plan) | +| Multi-policy | actor + critic + ref split, each writes back | actor + ref only (GRPO has no critic) | + +### What verl does that we don't (yet) + +1. **Per-prompt async generation.** verl's `AgentLoopWorkerTQ` writes to TQ + as each agent loop finishes. First finishers can in principle pipeline + into logprob compute earlier. We currently wait for the whole rollout + actor batch. Tracked under the async-RL plan; not on the sync 1-hop + critical path. +2. **`ReplayBuffer` pattern.** Useful for async RL where rollouts may produce + out-of-order vs training steps. Deferred to PR-async; sync 1-hop has + exact step alignment so we don't need it. +3. **Tag-based progress signal.** Simpler than the consumer-counter for + cross-step resumability. We can revisit if/when we need crash recovery. + +### What we do that verl doesn't + +1. **`DataPlaneClient` ABC.** verl is pinned to one TQ implementation; we + can swap (R: integration plan G2). Worth it because the field is + moving (mooncake_cpu, nv-dataplane). +2. **`shard_meta_for_dp`.** verl workers receive full meta and shard + internally; we shard on the driver because Megatron's + `shard_by_batch_size` requires `bin_count_multiple=DP_world` to avoid + deadlocks at the first cross-DP collective when sequence-packing + bin counts vary per rank. +3. **Driver-slice-only pattern.** verl pulls full batches into the driver + for compute_advantages/values; that scales poorly at long-context + (1–5 GB / step at 8k–32k seq) since the driver becomes a single-node + serialization bottleneck. We touch only small slices on the driver. +4. **Helper layer (`kv_first_write` / `read_columns` / `write_columns`).** + verl inlines the `kv_batch_get → process → kv_batch_put` pattern at + each call site. We extracted it because the same pattern repeats 5+ + times and we want one place to validate dtype / shape / key invariants. + +### TL;DR + +The two implementations are *primitive-compatible* (same `kv_batch_*` +calls, same key lifecycle, same `KVBatchMeta` shape) but +*integration-shape different*: + +- **verl** treats TQ as a stage queue with a polling replay buffer in + front of it; generation is per-prompt async; the driver still touches + bulk in some compute phases. +- **nemo-rl sync 1-hop** treats TQ as a sample-keyed dataframe; generation + is one batched actor; the driver only ever sees small slices. + +Both are correct; the cost differential at scale comes from how much +data flows through the driver. + +--- + +## 7. Performance characterization (this run) + +End-to-end parity vs the legacy driver-bulk path +(`grpo-run-a-legacy-v2.log`): + +- Steps 1–7 are bit-exact (loss + reward); divergence afterward is the + expected stochastic drift from accumulated policy updates. +- Steady-state step time: **+0.21 s** (1-hop 7.86 s vs legacy 7.65 s, + ~3 %). +- Per-phase breakdown (steady state, steps 2–19): + +| Phase | v4 (1-hop) | Legacy | Ī” | +|-------------------------------|-----------:|---------:|-----------:| +| Total step time | 7.606 s | 7.393 s | **+0.213 s** | +| policy_training | 0.596 s | 0.567 s | +0.028 s | +| generation | 1.502 s | 1.528 s | āˆ’0.027 s | +| policy_and_ref_logprob | 1.588 s | 1.448 s | **+0.141 s** | +| residual (driver bookkeeping) | 3.920 s | 3.850 s | +0.070 s | + +**The +0.21 s overhead is entirely TQ RPC roundtrip cost in the logprob +phase** (two worker calls Ɨ one fetch + one write each). Generation and +training are unchanged. + +### Crossover scale (where TQ wins) + +TQ overhead is mostly latency-bound (~constant per step), while legacy +driver fan-out is bandwidth-bound (scales with batch tensor volume Ɨ DP +fan-out). Mental model: + +- Legacy driver overhead ā‰ˆ ~5 ms/MB Ɨ (4 full-batch transfers per step) Ɨ DP-fan-out +- TQ overhead ā‰ˆ ~200 ms fixed (after fuse-and-overlap optimization: ~100 ms) + +Crossover when batch volume Ɨ DP fan-out Ɨ ~20 ms/MB ≄ TQ fixed cost: + +| Scale | Batch / step | DP ranks | Legacy cost | Winner | +|------------------------------------------|-------------:|---------:|------------:|-------------------------| +| Toy (this run, 1B, 512 tok, BS 32) | 0.6 MB | 8 | ~50 ms | **legacy +0.21 s** | +| Small prod (8B, 1k tok, BS 256) | ~10 MB | 8 | ~300 ms | **roughly tied** | +| Mid prod (70B, 4k tok, BS 1024) | ~250 MB | 32 | ~5–10 s | **TQ wins decisively** | +| Long-context (8k–32k seq, GRPO 16 gens) | 1–5 GB | 64+ | tens of s | **TQ wins decisively** | + +Rough crossover: **~10 MB / step / DP-rank of effective batch volume**. +Long sequences, more generations per prompt, and more DP ranks all push +the needle hard toward TQ. + +### Cheapest optimizations + +1. **Fuse `get_logprobs` + `get_reference_policy_logprobs` into one worker + call** — saves ~70 ms (one TQ input-fetch). Brings overhead from + +0.21 s → ~+0.14 s. +2. **Overlap TQ write-back with next-phase fetch** — saves another + ~30–50 ms. Combined: ~+0.10 s overhead, effectively at parity. + +Both are clean refactors inside `tq_policy.py` / `base_policy_worker.py` +and don't touch `grpo_sync.py`. Not on the critical path; flag for the +next data-plane optimization round. + +--- + +## 8. Where to look in the code + +| Concern | File | +|----------------------------------|---------------------------------------------------------------| +| Stable boundary | `nemo_rl/data_plane/interfaces.py` | +| Adapter (TransferQueue impl) | `nemo_rl/data_plane/adapters/transfer_queue.py` | +| Driver-side helpers | `nemo_rl/data_plane/driver_io.py` (`read_columns`, `write_columns`) | +| First-write helper | `nemo_rl/algorithms/sync_utils.py` | +| Rollout actor | `nemo_rl/algorithms/sync_utils.py` | +| DP-rank meta sharding | `nemo_rl/data_plane/preshard.py` | +| Worker fetch + write-back | `nemo_rl/models/policy/workers/base_policy_worker.py` | +| TQ-aware policy facade | `nemo_rl/models/policy/tq_policy.py` | +| End-to-end orchestration | `nemo_rl/algorithms/grpo_sync.py` | +| Unit tests | `tests/data_plane/unit/` | +| Design | `research/data_plane_integration_plan.md` §1.2 | diff --git a/nemo_rl/data_plane/factory.py b/nemo_rl/data_plane/factory.py new file mode 100644 index 0000000000..86b5a94481 --- /dev/null +++ b/nemo_rl/data_plane/factory.py @@ -0,0 +1,67 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Single entrypoint that maps a :class:`DataPlaneConfig` to a client.""" + +from __future__ import annotations + +from nemo_rl.data_plane.interfaces import DataPlaneClient, DataPlaneConfig + + +def build_data_plane_client( + cfg: DataPlaneConfig | None, *, bootstrap: bool = True +) -> DataPlaneClient: + """Construct the configured data-plane client. + + Dispatches on ``cfg["impl"]``. Only ``"transfer_queue"`` ships today; + other adapters can be added behind this factory without touching + call sites. Raises if data_plane is disabled — the legacy trainer + (``nemo_rl.algorithms.grpo.grpo_train``) should be used in that case + rather than a NoOp fallback here. + + Args: + cfg: Data-plane config; must have ``enabled=True``. + bootstrap: ``True`` on the driver — bootstraps the TQ + controller. ``False`` on worker processes — connects to the + existing controller (avoids creating a second named actor). + + Returns: + A configured ``DataPlaneClient``; wrapped in + :class:`MetricsDataPlaneClient` when observability is enabled. + """ + if cfg is None or not cfg["enabled"]: + raise ValueError( + "build_data_plane_client called with data_plane disabled. " + "Use the legacy nemo_rl.algorithms.grpo.grpo_train trainer " + "(which never engages the data plane) for that case." + ) + + impl = cfg["impl"] + if impl == "transfer_queue": + from nemo_rl.data_plane.adapters.transfer_queue import TQDataPlaneClient + + client: DataPlaneClient = TQDataPlaneClient(cfg, bootstrap=bootstrap) + else: + raise ValueError(f"unknown data_plane impl: {impl!r}") + + obs = cfg.get("observability") or {} + if obs.get("enabled", False): + from nemo_rl.data_plane.observability import ( + MetricsDataPlaneClient, + log_event, + ) + + on_event = obs.get("callback") or log_event + # pyrefly: obs.get returns Any, can't narrow to the expected callback type. + client = MetricsDataPlaneClient(client, on_event=on_event) # type: ignore[bad-argument-type] + return client diff --git a/nemo_rl/data_plane/interfaces.py b/nemo_rl/data_plane/interfaces.py new file mode 100644 index 0000000000..ba743e7525 --- /dev/null +++ b/nemo_rl/data_plane/interfaces.py @@ -0,0 +1,353 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Stable boundary between NeMo-RL and data-plane implementations. + +Wire shape adapters must support: + * ``fields``: ``TensorDict`` with tensor leaves AND optional + ``NonTensorStack`` / ``NonTensorData`` leaves (TQ-native non-tensor + passthrough). TQ's storage backends handle encoding per backend + (simple keeps Python objects; mooncake_client pickles internally). + * ``tags``: ``list[dict[str, Any]]`` per-sample primitives (kept + separate from ``fields`` so non-tensor metadata like + ``input_lengths`` doesn't pollute the leaf-level schema). + * ``keys``: per-sample string uids. + * ``partition_id``: string-named address spaces with declared + ``consumer_tasks`` and ``fields`` schemas. + +All call sites in ``nemo_rl/algorithms``, ``nemo_rl/experience`` and +``nemo_rl/models`` go through :class:`DataPlaneClient` — never +``import transfer_queue`` directly. This is what makes the +implementation swappable. + +See ``nemo_rl/data_plane/README.md`` for the full design. +""" + +from __future__ import annotations + +from abc import ABC, abstractmethod +from dataclasses import dataclass, field +from typing import Any, Callable, Literal, NotRequired, Sequence, TypedDict + +from tensordict import TensorDict + + +class DataPlaneConfig(TypedDict): + """Feature-gated config; defaults to disabled. + + ``backend`` is the storage backend *inside* TransferQueue; it is owned by + the TQ adapter, not by NeMo-RL. ``impl`` selects which adapter we go + through. + """ + + enabled: bool + impl: Literal["transfer_queue"] + backend: NotRequired[Literal["simple", "mooncake_cpu"]] + controller_address: NotRequired[str] + storage_capacity: NotRequired[int] + num_storage_units: NotRequired[int] + claim_meta_poll_interval_s: NotRequired[float] + ack_timeout_ms: NotRequired[int] + observability: NotRequired["ObservabilityConfig"] + + +class ObservabilityConfig(TypedDict): + """Optional middleware that records per-op metrics on the client. + + Off by default. When ``enabled=True`` the factory wraps the chosen + adapter with :class:`MetricsDataPlaneClient`. ``callback`` is + injected programmatically (callables don't round-trip through + YAML) — set ``cfg["observability"]["callback"] = my_fn`` before + :func:`build_data_plane_client` to plug into wandb / file / log. + Default callback prints one line per op for debug. + """ + + enabled: bool + callback: NotRequired[Callable[[dict[str, Any]], None]] + + +@dataclass +class KVBatchMeta: + """1:1 mirror of ``transfer_queue.metadata.KVBatchMeta``. + + Attribute names match TransferQueue exactly so the adapter does not need + a rename layer and TQ's own ``select_fields`` validation works against + our object unmodified. + + Two roles: + * Result type returned by :meth:`DataPlaneClient.claim_meta` — callers + extract ``.keys`` / ``.partition_id`` and pass them to + :meth:`kv_batch_get` / :meth:`get_data`. + * Argument type for the per-DP-rank fetch entrypoints. + ``sequence_lengths`` lets the driver compute a balanced per-rank + shard from metadata only (control plane), without ever + materializing tensor data. + """ + + partition_id: str + task_name: str | None + keys: list[str] + fields: list[str] | None = None + sequence_lengths: list[int] | None = None + extra_info: dict[str, Any] = field(default_factory=dict) + + @property + def size(self) -> int: + return len(self.keys) + + # ── Pure-metadata transforms (no I/O) ────────────────────────────── + # Used by dynamic_sampling on the meta path: filter zero-std rows + # (subset), accumulate survivors across iterations (concat), trim + # an over-full cache to the training batch size (slice). Each + # returns a fresh KVBatchMeta — caller is responsible for kv_clear- + # ing any uids dropped from the working set. + + def _replace( + self, + *, + keys: list[str], + sequence_lengths: list[int] | None, + ) -> "KVBatchMeta": + """Return a copy with new keys/sequence_lengths, same metadata otherwise.""" + return KVBatchMeta( + partition_id=self.partition_id, + task_name=self.task_name, + keys=list(keys), + fields=self.fields, + sequence_lengths=list(sequence_lengths) + if sequence_lengths is not None + else None, + extra_info=dict(self.extra_info or {}), + ) + + def subset(self, indices: "Sequence[int]") -> "KVBatchMeta": + """Return a new meta with only the rows at ``indices`` (any order).""" + return self._replace( + keys=[self.keys[i] for i in indices], + sequence_lengths=( + [self.sequence_lengths[i] for i in indices] + if self.sequence_lengths is not None + else None + ), + ) + + def slice(self, start: int, stop: int) -> "KVBatchMeta": + """Return a new meta with rows in the contiguous range ``[start, stop)``.""" + return self._replace( + keys=self.keys[start:stop], + sequence_lengths=( + self.sequence_lengths[start:stop] + if self.sequence_lengths is not None + else None + ), + ) + + def concat(self, *others: "KVBatchMeta") -> "KVBatchMeta": + """Append ``others`` to ``self``. All metas must share ``partition_id``.""" + if any(o.partition_id != self.partition_id for o in others): + raise ValueError("KVBatchMeta.concat: partition_ids must match") + all_m = (self, *others) + keys = [k for m in all_m for k in m.keys] + all_have_lens = all(m.sequence_lengths is not None for m in all_m) + seq_lens = ( + [s for m in all_m for s in (m.sequence_lengths or [])] + if all_have_lens + else None + ) + return self._replace(keys=keys, sequence_lengths=seq_lens) + + +class DataPlaneClient(ABC): + """Stable, swappable data-plane boundary. + + The methods are split into three groups by intent. Argument order + mirrors the underlying ``transfer_queue`` API 1:1 so a future adapter + (e.g. ``nv-dataplane``) is a thin pass-through too. + + A. *Task-mediated* — used by stages that wait for upstream production + via the per-task consumer counter: + :meth:`register_partition`, :meth:`claim_meta`, :meth:`get_data`, + :meth:`check_consumption_status`. + B. *Direct-by-key* — used by stages that already know the exact uids + (e.g. driver-side fan-out to DP ranks): + :meth:`kv_batch_put`, :meth:`kv_batch_get`, :meth:`kv_clear`. + C. *Lifecycle* — :meth:`close`. + + Stage-completion signal: there is intentionally no ``mark_consumed``. + The authoritative signal in TransferQueue is *field production* — + when a stage calls :meth:`kv_batch_put` for a new field, the controller + flips ``production_status[sample, field] = 1``. Downstream consumers + waiting on that field only see those samples once produced. + """ + + # ── (A) task-mediated ─────────────────────────────────────────────── + + @abstractmethod + def register_partition( + self, + partition_id: str, + fields: list[str], + num_samples: int, + consumer_tasks: list[str], + grpo_group_size: int | None = None, + enums: dict[str, list[str]] | None = None, + ) -> None: + """Declare the partition schema and consumer tasks. + + Args: + partition_id: Partition name. + fields: Superset of fields any producer may write here. + num_samples: Expected total samples; sizes controller arrays. + consumer_tasks: Named tasks; each gets its own consumption cursor. + grpo_group_size: Group size for GRPO balanced sampling. + enums: Per-field fixed-vocab string codec, shipped once at register. + """ + + @abstractmethod + def claim_meta( + self, + partition_id: str, + task_name: str, + required_fields: list[str], + batch_size: int, + dp_rank: int | None = None, + blocking: bool = True, + timeout_s: float = 60.0, + ) -> KVBatchMeta: + """Discover and **claim** up to ``batch_size`` ready samples. + + Advances ``task_name``'s per-sample consumption cursor (TQ's + ``mode='fetch'``); claimed uids won't be returned again. Samples + stay readable via :meth:`kv_batch_get` until :meth:`kv_clear`. + + Args: + partition_id: Partition to claim from. + task_name: Consumer task whose cursor is advanced. + required_fields: Fields that must be produced for a sample to be claimable. + batch_size: Max samples to claim. + dp_rank: Reserved; driver-side balancing via :func:`shard_meta_for_dp` is used today. + blocking: Block until the batch can be claimed. + timeout_s: Max blocking time before raising. + + Returns: + ``KVBatchMeta`` for the claimed batch; pass to :meth:`get_data`. + """ + + @abstractmethod + def get_data( + self, + meta: KVBatchMeta, + select_fields: list[str] | None = None, + ) -> TensorDict: + """Resolve a meta to tensor data. + + Field-set resolution: (1) explicit ``select_fields``; (2) + ``meta.fields`` if non-None; (3) *fail loudly* — never silently + fetch all fields. + + Args: + meta: From :meth:`claim_meta` or hand-built with explicit keys. + select_fields: Subset of fields to fetch. + + Returns: + ``TensorDict`` keyed by field name, batched along ``meta.keys``. + """ + + @abstractmethod + def check_consumption_status( + self, partition_id: str, task_names: list[str] + ) -> bool: + """True iff every task has consumed all samples in the partition. + + Authoritative across workers — uses TQ's controller-side counter, + not the per-process client cache. + + Args: + partition_id: Partition to check. + task_names: Tasks whose consumption cursors are inspected. + + Returns: + ``True`` iff every task in ``task_names`` has consumed all samples. + """ + + # ── (B) direct-by-key (TQ-aligned signatures) ────────────────────── + + @abstractmethod + def kv_batch_put( + self, + keys: list[str], + partition_id: str, + fields: TensorDict | None = None, + tags: list[dict[str, Any]] | None = None, + ) -> KVBatchMeta: + """Write fields for ``keys`` — the producer entrypoint. + + Writing a field flips the controller's ``production_status`` bit + for ``(sample, field)``; that flip is the "stage finished" signal + downstream consumers wait on. Tensor and ``NonTensorStack`` leaves + both pass through to TQ; non-tensor encoding is per-backend. + + Args: + keys: Per-sample uids being written. + partition_id: Partition these keys belong to. + fields: Tensor / ``NonTensorStack`` leaves to write. + tags: Optional per-sample primitive metadata. + + Returns: + ``KVBatchMeta`` covering ``keys`` — usable for direct :meth:`kv_batch_get`. + """ + + @abstractmethod + def kv_batch_get( + self, + keys: list[str], + partition_id: str, + select_fields: list[str], + ) -> TensorDict: + """Direct fetch by uids. + + Used by per-DP-rank slice fetches. Does NOT advance any per-task + consumption cursor — that only happens via :meth:`claim_meta`. + + ``select_fields`` is required (no implicit "fetch every field" + fallback): bulk schemas are wide and silent over-fetch is the + most expensive shape the wire can take. Callers must name what + they read. + + Args: + keys: Uids to fetch. + partition_id: Partition the keys live in. + select_fields: Subset of fields to fetch. + + Returns: + ``TensorDict`` keyed by field name, batched along ``keys``. + """ + + @abstractmethod + def kv_clear( + self, + keys: list[str] | None, + partition_id: str, + ) -> None: + """Drop key-value pairs. + + Args: + keys: Uids to drop; ``None`` clears the whole partition. + partition_id: Partition the keys live in. + """ + + # ── (C) lifecycle ────────────────────────────────────────────────── + + @abstractmethod + def close(self) -> None: + """Release controller / storage handles. Idempotent.""" diff --git a/nemo_rl/data_plane/observability.py b/nemo_rl/data_plane/observability.py new file mode 100644 index 0000000000..0af6348afa --- /dev/null +++ b/nemo_rl/data_plane/observability.py @@ -0,0 +1,339 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Lean per-op metrics decorator for ``DataPlaneClient``. + +Wraps any ``DataPlaneClient`` and invokes a single user-provided +callback on each operation. Each event is a flat dict:: + + {"op", "partition_id", "n_keys", "n_bytes", "wall_ms", "status"} + +Plug wandb / file logging / debug print at the call site by passing +``on_event=``. ``snapshot()`` returns cumulative +totals **plus** live memory consumption: ``bytes_outstanding`` (sum of +bytes currently held in TQ, i.e. put minus cleared) and +``peak_bytes_outstanding`` (high-water mark over the run lifetime). +""" + +from __future__ import annotations + +import logging +from dataclasses import asdict, dataclass +from time import monotonic +from typing import Any, Callable, Literal, TypedDict + +EventStatus = Literal["ok", "error", "timeout"] + + +class DataPlaneEvent(TypedDict): + op: str + partition_id: str + n_keys: int + n_bytes: int + wall_ms: float + status: EventStatus + + +import torch +from tensordict import TensorDict + +from nemo_rl.data_plane.interfaces import DataPlaneClient, KVBatchMeta + +logger = logging.getLogger(__name__) + + +def _td_bytes(td: TensorDict | None) -> int: + if td is None: + return 0 + total = 0 + for k in td.keys(include_nested=True, leaves_only=True): + v = td.get(k) + if not isinstance(v, torch.Tensor): + continue + t = v.values() if v.is_nested else v + total += t.numel() * t.element_size() + return total + + +def log_event(event: DataPlaneEvent) -> None: + logger.info("data_plane_event: %s", event) + + +@dataclass +class DataPlaneStats: + total_bytes: int = 0 + total_keys: int = 0 + total_ops: int = 0 + bytes_outstanding: int = 0 + peak_bytes_outstanding: int = 0 + # Anomaly trackers — a wire-format regression that bloats bytes per + # row (cf. message_log view-aliasing pickle bug) shows up as a + # sudden spike in ``max_bytes_per_key_seen``. + max_bytes_per_key_seen: int = 0 + last_put_bytes_per_key: int = 0 + + +class MetricsDataPlaneClient(DataPlaneClient): + """Wrap a ``DataPlaneClient`` with a per-op callback hook.""" + + def __init__( + self, + inner: DataPlaneClient, + on_event: Callable[[DataPlaneEvent], None] | None = None, + ) -> None: + self._inner = inner + self._on_event = on_event or (lambda _: None) + self._stats = DataPlaneStats() + # Nested per-partition / per-key live byte counts. Populated on + # successful ``kv_batch_put``; popped on successful ``kv_clear``. + # Bounded by the live key population, not cumulative traffic. + self._bytes_by_partition: dict[str, dict[str, int]] = {} + + def snapshot(self) -> dict[str, Any]: + """Return cumulative totals plus live byte / key outstanding counts.""" + out = asdict(self._stats) + out["n_keys_outstanding"] = sum( + len(d) for d in self._bytes_by_partition.values() + ) + return out + + def bytes_outstanding_by_partition(self) -> dict[str, int]: + """Per-partition breakdown of currently-held bytes.""" + return {p: sum(d.values()) for p, d in self._bytes_by_partition.items()} + + def _record_put(self, partition_id: str, keys: list[str], n_bytes: int) -> None: + """Attribute put bytes per key so a later ``kv_clear`` can subtract. + + Called after the underlying RPC succeeds so a failed put never + leaves the accounting inflated. + + Args: + partition_id: Partition the keys were written to. + keys: Per-sample uids that were written. + n_bytes: Total bytes written; distributed evenly across keys. + """ + if not keys or n_bytes <= 0: + return + per_key, remainder = divmod(n_bytes, len(keys)) + partition_dict = self._bytes_by_partition.setdefault(partition_id, {}) + for i, key in enumerate(keys): + share = per_key + (1 if i < remainder else 0) + partition_dict[key] = partition_dict.get(key, 0) + share + self._stats.bytes_outstanding += n_bytes + if self._stats.bytes_outstanding > self._stats.peak_bytes_outstanding: + self._stats.peak_bytes_outstanding = self._stats.bytes_outstanding + + def _record_clear(self, partition_id: str, keys: list[str] | None) -> None: + """Reverse the put accounting for ``keys``. + + Called after the underlying RPC succeeds so a failed clear keeps + the accounting consistent with TQ's actual state. + + Args: + partition_id: Partition the keys were dropped from. + keys: Uids dropped; ``None`` means the whole partition was cleared. + """ + partition_dict = self._bytes_by_partition.get(partition_id) + if partition_dict is None: + return + if keys is None: + freed = sum(partition_dict.values()) + del self._bytes_by_partition[partition_id] + else: + freed = 0 + for key in keys: + freed += partition_dict.pop(key, 0) + if not partition_dict: + del self._bytes_by_partition[partition_id] + self._stats.bytes_outstanding -= freed + + def _run( + self, + op: str, + partition_id: str, + fn: Callable[[], Any], + *, + n_keys: int = 0, + n_bytes: int = 0, + ) -> Any: + """Run ``fn`` and emit one observability event with wall-time and status. + + Args: + op: Operation tag (``"put"``, ``"get"``, ``"clear"``, etc.). + partition_id: Partition the op targets. + fn: Zero-arg callable that invokes the inner client. + n_keys: Key count if known up front; otherwise inferred from + the return value (``KVBatchMeta.keys``). + n_bytes: Byte estimate; overridden by ``_td_bytes`` when the + return is a ``TensorDict``. + + Returns: + Whatever ``fn`` returned. + """ + t0 = monotonic() + try: + out = fn() + except TimeoutError: + self._emit(op, partition_id, n_keys, n_bytes, t0, "timeout") + raise + except Exception: + self._emit(op, partition_id, n_keys, n_bytes, t0, "error") + raise + # If the call returns a TensorDict, the read-side bytes are more + # informative than the input estimate. + if isinstance(out, TensorDict): + n_bytes = _td_bytes(out) + elif isinstance(out, KVBatchMeta) and not n_keys: + n_keys = len(out.keys) + self._emit(op, partition_id, n_keys, n_bytes, t0, "ok") + return out + + def _emit( + self, + op: str, + partition_id: str, + n_keys: int, + n_bytes: int, + t0: float, + status: EventStatus, + ) -> None: + event: DataPlaneEvent = { + "op": op, + "partition_id": partition_id, + "n_keys": int(n_keys), + "n_bytes": int(n_bytes), + "wall_ms": (monotonic() - t0) * 1000.0, + "status": status, + } + self._on_event(event) + if status == "ok": + self._stats.total_bytes += n_bytes + self._stats.total_keys += n_keys + self._stats.total_ops += 1 + if op == "put" and n_keys: + per_key = n_bytes // n_keys + self._stats.last_put_bytes_per_key = per_key + if per_key > self._stats.max_bytes_per_key_seen: + self._stats.max_bytes_per_key_seen = per_key + + def register_partition( + self, + partition_id, + fields, + num_samples, + consumer_tasks, + grpo_group_size=None, + enums=None, + ): + self._run( + "register", + partition_id, + lambda: self._inner.register_partition( + partition_id, + fields, + num_samples, + consumer_tasks, + grpo_group_size=grpo_group_size, + enums=enums, + ), + n_keys=int(num_samples), + ) + + def claim_meta( + self, + partition_id, + task_name, + required_fields, + batch_size, + dp_rank=None, + blocking=True, + timeout_s=60.0, + ): + return self._run( + "claim_meta", + partition_id, + lambda: self._inner.claim_meta( + partition_id, + task_name, + required_fields, + batch_size, + dp_rank=dp_rank, + blocking=blocking, + timeout_s=timeout_s, + ), + ) + + def get_data(self, meta, select_fields=None): + return self._run( + "get_data", + meta.partition_id, + lambda: self._inner.get_data(meta, select_fields=select_fields), + n_keys=len(meta.keys), + ) + + def check_consumption_status(self, partition_id, task_names): + return self._run( + "check_consumption_status", + partition_id, + lambda: self._inner.check_consumption_status(partition_id, task_names), + ) + + def kv_batch_put(self, keys, partition_id, fields=None, tags=None): + n_bytes = _td_bytes(fields) + # Materialize keys once: ``_run`` consumes its lambda and we + # also need to attribute bytes per key after success. + keys_list = keys if isinstance(keys, list) else list(keys) + out = self._run( + "put", + partition_id, + lambda: self._inner.kv_batch_put( + keys_list, + partition_id, + fields=fields, + tags=tags, + ), + n_keys=len(keys_list), + n_bytes=n_bytes, + ) + self._record_put(partition_id, keys_list, n_bytes) + return out + + def kv_batch_get(self, keys, partition_id, select_fields): + return self._run( + "get", + partition_id, + lambda: self._inner.kv_batch_get( + keys, + partition_id, + select_fields=select_fields, + ), + n_keys=len(keys), + ) + + def kv_clear(self, keys, partition_id): + keys_list = keys if (keys is None or isinstance(keys, list)) else list(keys) + n_keys = len(keys_list) if keys_list is not None else 0 + self._run( + "clear", + partition_id, + lambda: self._inner.kv_clear(keys_list, partition_id), + n_keys=n_keys, + ) + self._record_clear(partition_id, keys_list) + + def close(self) -> None: + self._run( + "close", + "", + lambda: self._inner.close(), + ) diff --git a/nemo_rl/data_plane/preshard.py b/nemo_rl/data_plane/preshard.py new file mode 100644 index 0000000000..c610870935 --- /dev/null +++ b/nemo_rl/data_plane/preshard.py @@ -0,0 +1,164 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Driver-side balanced packing + per-rank fan-out helpers. + +Shared by sync and async data-plane trainers. Operates on full +``BatchedDataDict``s and relies on ``shard_by_batch_size``'s +``bin_count_multiple=DP_world`` behavior to keep per-rank microbatch +counts uniform — without that, sequence packing / dynamic batching +produce variable per-rank bin counts and Megatron deadlocks at the +first cross-DP collective. +""" + +from __future__ import annotations + +from typing import Any, Optional + +import torch + +from nemo_rl.data_plane.interfaces import KVBatchMeta +from nemo_rl.data_plane.schema import ( + ELEM_COUNTS_PER_GB, + INPUT_IDS, + INPUT_LENGTHS, + META_IDX, + MICRO_BATCH_INDICES, + MICRO_BATCH_LENGTHS, + SAMPLE_MASK, +) +from nemo_rl.distributed.batched_data_dict import BatchedDataDict + + +def shard_meta_for_dp( + meta: KVBatchMeta, + *, + dp_world: int, + batch_size: Optional[int] = None, + sequence_packing_args: Optional[dict[str, Any]] = None, + dynamic_batching_args: Optional[dict[str, Any]] = None, +) -> tuple[list[KVBatchMeta], Optional[list[int]]]: + """Pure key-list split: assign ``meta.keys`` to ``dp_world`` ranks. + + Seq-len-aware on top of ``shard_by_batch_size``. No I/O, no key + minting. Used for every dispatch after rollout (logprob, ref-logprob, + train); the rollout actor's first write goes through + :func:`nemo_rl.experience.sync_rollout_actor.kv_first_write` directly. + + Per-rank packing metadata (``micro_batch_indices`` / + ``micro_batch_lengths`` / ``elem_counts_per_gb``) is set in each + shard's ``extra_info`` so the ``*_presharded`` worker can reattach + packing as it does on the legacy fan-out path. + + Args: + meta: Full-batch ``KVBatchMeta`` with ``sequence_lengths`` populated. + dp_world: Number of DP ranks. + batch_size: Total samples; ``None`` for the logprob path, GBS for train. + sequence_packing_args: Packing config dict for ``shard_by_batch_size``. + dynamic_batching_args: Dynamic-batching config dict; mutually exclusive with the above. + + Returns: + ``(per_rank_metas, unsorted_indices)``. ``unsorted_indices`` is + the inverse permutation that maps DP-rank-order outputs back to + original ``meta.keys`` order (feed to + ``BatchedDataDict.reorder_data`` post-aggregation); ``None`` if + no reorder occurred. + """ + n = len(meta.keys) + if n == 0: + raise ValueError("shard_meta_for_dp: empty meta — nothing to shard") + if meta.sequence_lengths is None or len(meta.sequence_lengths) != n: + raise ValueError( + "shard_meta_for_dp requires meta.sequence_lengths populated and " + f"of length {n} (got {meta.sequence_lengths!r}). The rollout " + "actor's fan-out should populate this from input_lengths." + ) + if sequence_packing_args is not None and dynamic_batching_args is not None: + raise ValueError( + "Pass at most one of sequence_packing_args / dynamic_batching_args." + ) + + seq_lens = list(meta.sequence_lengths) + # Skeleton BatchedDataDict — `shard_by_batch_size` only needs + # input_ids (placeholder), input_lengths (real), sample_mask (ones). + # ``meta_idx`` lets us recover which original meta index each shard row + # corresponds to, so we can slice ``meta.keys`` per rank. + skeleton = BatchedDataDict( + { + INPUT_IDS: torch.zeros(n, 1, dtype=torch.int64), + INPUT_LENGTHS: torch.tensor(seq_lens, dtype=torch.int64), + SAMPLE_MASK: torch.ones(n, dtype=torch.float32), + META_IDX: torch.arange(n, dtype=torch.int64), + } + ) + + if dynamic_batching_args is not None: + sharded, _ = skeleton.shard_by_batch_size( + dp_world, + batch_size=batch_size, + # pyrefly: ignore # bad-argument-type + dynamic_batching_args=dynamic_batching_args, + ) + elif sequence_packing_args is not None: + sharded, _ = skeleton.shard_by_batch_size( + dp_world, + batch_size=batch_size, + # pyrefly: ignore # bad-argument-type + sequence_packing_args=sequence_packing_args, + ) + else: + sharded = skeleton.shard_by_batch_size(dp_world, batch_size=batch_size) + + base_extra: dict[str, Any] = dict(meta.extra_info or {}) + out: list[KVBatchMeta] = [] + flat_idx: list[int] = [] + for shard in sharded: + # pyrefly: ignore # no-matching-overload + idx_list: list[int] = shard[META_IDX].tolist() + flat_idx.extend(idx_list) + rank_keys = [meta.keys[i] for i in idx_list] + rank_seqlens = [seq_lens[i] for i in idx_list] + rank_extra = dict(base_extra) + # Per-shard packing metadata — set by ``shard_by_batch_size`` when + # sequence_packing/dynamic_batching is enabled. Workers' *_presharded + # paths look these up off ``meta.extra_info``. + for attr in ( + MICRO_BATCH_INDICES, + MICRO_BATCH_LENGTHS, + ELEM_COUNTS_PER_GB, + ): + val = getattr(shard, attr, None) + if val is not None: + rank_extra[attr] = val + out.append( + KVBatchMeta( + partition_id=meta.partition_id, + task_name=meta.task_name, + keys=rank_keys, + fields=meta.fields, + sequence_lengths=rank_seqlens, + extra_info=rank_extra, + ) + ) + + # Build inverse permutation: unsorted[orig_idx] = position_in_aggregated. + # When workers' results are concatenated in DP-rank order, row `j` of + # the aggregate corresponds to original index `flat_idx[j]`. To restore + # original meta.keys order, the caller does aggregated.reorder_data( + # unsorted_indices) — same contract as `_shard_for_logprob`. + unsorted: Optional[list[int]] = None + if flat_idx != list(range(n)): + unsorted = [0] * n + for new_pos, old_idx in enumerate(flat_idx): + unsorted[old_idx] = new_pos + return out, unsorted diff --git a/nemo_rl/data_plane/schema.py b/nemo_rl/data_plane/schema.py new file mode 100644 index 0000000000..64d8b7902e --- /dev/null +++ b/nemo_rl/data_plane/schema.py @@ -0,0 +1,59 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Shared constants and type aliases for the data-plane meta contract.""" + +from typing import Literal + +# Materialization layout for `codec.materialize` / `read_columns` / worker fetch. +Layout = Literal["padded", "jagged"] + +# Per-shard packing metadata keys in `KVBatchMeta.extra_info`. +MICRO_BATCH_INDICES = "micro_batch_indices" +MICRO_BATCH_LENGTHS = "micro_batch_lengths" +ELEM_COUNTS_PER_GB = "elem_counts_per_gb" + +# Skeleton field names from `shard_meta_for_dp`. +INPUT_IDS = "input_ids" +INPUT_LENGTHS = "input_lengths" +SAMPLE_MASK = "sample_mask" +META_IDX = "meta_idx" + +# Tensor fields in the train partition. Rollout writes the input +# subset on first put; later stages add prev_logprobs / +# reference_policy_logprobs (workers) and advantages (driver). +DP_TRAIN_FIELDS = ( + "input_ids", + "input_lengths", + "generation_logprobs", + "prev_logprobs", + "reference_policy_logprobs", + "advantages", + "token_mask", + "sample_mask", +) + +# Subset fetched by logprob / ref-logprob workers. +LP_SEED_FIELDS = ( + "input_ids", + "input_lengths", + "token_mask", + "sample_mask", +) + +# Train-partition fields NOT needed for KV-scale calibration. Derived +# from ``DP_TRAIN_FIELDS`` so a new train-side column added to the +# schema is excluded-by-default — to include a new column in +# calibration, add it to the private set below. +_DP_CALIB_INPUT_FIELDS = frozenset({INPUT_IDS, INPUT_LENGTHS}) +DP_CALIB_EXCLUDED_FIELDS = frozenset(DP_TRAIN_FIELDS) - _DP_CALIB_INPUT_FIELDS diff --git a/nemo_rl/data_plane/worker_mixin.py b/nemo_rl/data_plane/worker_mixin.py new file mode 100644 index 0000000000..f6e5bd8fc9 --- /dev/null +++ b/nemo_rl/data_plane/worker_mixin.py @@ -0,0 +1,487 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""TransferQueue awareness for policy workers, isolated from the base class. + +Mix into a worker class to add per-rank TQ-mediated entrypoints +(:meth:`train_presharded`, :meth:`get_logprobs_presharded`, +:meth:`get_reference_policy_logprobs_presharded`) without touching +``BasePolicyWorker``. Subclasses that don't need TQ keep their bare +inheritance and stay zero-cost. + +Subclasses must implement :meth:`_get_replica_group` (returns the +NCCL group of TPƗCPƗPP siblings within this DP rank, or ``None`` for +TP=CP=PP=1) and inherit ``train`` / ``get_logprobs`` / +``get_reference_policy_logprobs`` from the worker base. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Any, Literal, Optional + +import torch + +FetchPolicy = Literal["auto", "independent", "leader_broadcast"] + +from nemo_rl.data.llm_message_utils import attach_message_log_view +from nemo_rl.data_plane.schema import ( + ELEM_COUNTS_PER_GB, + MICRO_BATCH_INDICES, + MICRO_BATCH_LENGTHS, + Layout, +) +from nemo_rl.distributed.batched_data_dict import BatchedDataDict +from nemo_rl.models.policy.interfaces import ReferenceLogprobOutputSpec +from nemo_rl.utils.nsys import wrap_with_nvtx_name + +if TYPE_CHECKING: + from nemo_rl.data_plane import DataPlaneConfig, KVBatchMeta + from nemo_rl.data_plane.interfaces import DataPlaneClient + + +def _broadcast_batched_data_dict( + data: Optional[BatchedDataDict[Any]], + *, + src: int, + group: Any, +) -> BatchedDataDict[Any]: + """Broadcast a BatchedDataDict from ``src`` to all ranks in ``group``. + + Two-phase to avoid pickling tensor payloads on the hot path: a small + descriptor (per-key dtype/shape) ships via ``broadcast_object_list`` + first, then each tensor's data ships via ``broadcast`` on its + current device. The leader supplies ``data``; non-leaders pass + ``None`` and get an empty BatchedDataDict filled in-place. + """ + is_leader = torch.distributed.get_rank() == src + # NCCL groups can only broadcast CUDA tensors; pick the broadcast + # device from the group backend so CPU TQ outputs are moved to GPU + # before NCCL broadcast. + backend = torch.distributed.get_backend(group) + bcast_device: Any = torch.cuda.current_device() if backend == "nccl" else "cpu" + + if is_leader: + assert data is not None, "leader must provide non-None data" + descriptor: list[Any] = [] + for k, v in data.items(): + if isinstance(v, torch.Tensor): + descriptor.append( + (k, "tensor", str(v.dtype), tuple(v.shape), str(v.device)) + ) + else: + descriptor.append((k, "raw", v)) + payload: list[Any] = [descriptor] + else: + payload = [None] + + torch.distributed.broadcast_object_list(payload, src=src, group=group) + descriptor = payload[0] + assert descriptor is not None + + # pyrefly: ignore # bad-assignment + out: BatchedDataDict[Any] = data if is_leader else BatchedDataDict() + for entry in descriptor: + key = entry[0] + kind = entry[1] + if kind == "tensor": + dtype_str, shape, src_device = entry[2], entry[3], entry[4] + if is_leader: + tensor = out[key] + if tensor.device.type != torch.device(bcast_device).type: + tensor = tensor.to(bcast_device) + out[key] = tensor + else: + dtype = getattr(torch, dtype_str.split(".")[-1]) + tensor = torch.empty(shape, dtype=dtype, device=bcast_device) + out[key] = tensor + torch.distributed.broadcast(tensor, src=src, group=group) + # Restore non-leader tensors to the leader's source device + # so downstream code sees the same layout pre-broadcast. + if ( + not is_leader + and torch.device(src_device).type != torch.device(bcast_device).type + ): + out[key] = tensor.to(src_device) + else: + if not is_leader: + out[key] = entry[2] + return out + + +class TQWorkerMixin: + """Adds TransferQueue per-rank fetch/write-back to a policy worker. + + The driver-side ``TQPolicy`` fans out per-rank ``KVBatchMeta``; + each worker calls ``self._fetch(meta, ...)`` to pull its slice from + TQ and runs the existing per-rank method body. + """ + + _dp_client: Optional[DataPlaneClient] = None + + def setup_data_plane(self, cfg: DataPlaneConfig) -> None: + """Connect this worker process's client to the existing TQ controller. + + Called once by the driver after worker construction. Idempotent. + """ + if self._dp_client is not None: + return + from nemo_rl.data_plane import build_data_plane_client + + # bootstrap=False — the driver already created the named + # controller actor; this process attaches as a client. + self._dp_client = build_data_plane_client(cfg, bootstrap=False) + + def _require_dp_client(self) -> DataPlaneClient: + if self._dp_client is None: + raise RuntimeError( + "Data-plane client not initialised on worker. The driver " + "must call setup_data_plane(cfg) before invoking any " + "*_presharded entrypoint." + ) + return self._dp_client + + def _get_replica_group(self) -> Optional[Any]: + """NCCL group of TPƗCPƗPP siblings within this DP rank. + + ``None`` means "no siblings" (TP=CP=PP=1). Subclasses must + override using their parallelism state (DTensor ``device_mesh``, + Megatron ``parallel_state``). Returning ``None`` makes + :meth:`_fetch` use independent fetch; returning a group makes + it use leader-fetch + NCCL broadcast. + """ + return None + + def _pad_value_dict(self) -> dict[str, Any]: + """Per-field pad value used by :func:`materialize` to detile the jagged wire format. + + Token-id fields use the tokenizer pad id. + """ + pad_id = getattr(getattr(self, "tokenizer", None), "pad_token_id", None) + if pad_id is None: + return {} + return {"input_ids": pad_id, "prompt_ids_for_adv": pad_id} + + def _fetch( + self, + meta: "KVBatchMeta", + *, + layout: Layout = "padded", + fetch_policy: FetchPolicy = "auto", + preprocess: Optional[Any] = None, + ) -> BatchedDataDict[Any]: + """Fetch this rank's slice from TQ and return a BatchedDataDict. + + Args: + meta: Per-rank ``KVBatchMeta`` from :func:`shard_meta_for_dp`. + layout: Materialization layout (``"padded"`` or ``"jagged"``). + fetch_policy: ``"auto"`` uses leader-fetch + NCCL broadcast when + :meth:`_get_replica_group` returns a group, else independent + fetch (cheapest for TP=CP=PP=1). ``"independent"`` forces + every sibling to fetch. ``"leader_broadcast"`` forces the + broadcast path and asserts a replica group exists. + preprocess: Optional ``(worker, td) -> td`` applied between + materialize and return. + + Returns: + ``BatchedDataDict`` of this rank's slice. + """ + if fetch_policy not in {"auto", "independent", "leader_broadcast"}: + raise ValueError(f"unknown fetch_policy: {fetch_policy!r}") + + from nemo_rl.data_plane import materialize + + pad_value_dict = self._pad_value_dict() + replica_group = ( + self._get_replica_group() + if fetch_policy in {"auto", "leader_broadcast"} + else None + ) + if fetch_policy == "leader_broadcast" and replica_group is None: + raise RuntimeError( + "_fetch(fetch_policy='leader_broadcast') requires a " + "replica group, but _get_replica_group() returned None." + ) + + pad_to_multiple = int((meta.extra_info or {}).get("pad_to_multiple", 1)) + + if replica_group is not None: + leader = torch.distributed.get_global_rank(replica_group, 0) + is_leader = torch.distributed.get_rank() == leader + if is_leader: + td = self._require_dp_client().kv_batch_get( + keys=meta.keys, + partition_id=meta.partition_id, + select_fields=list(meta.fields), # type: ignore[no-matching-overload] + ) + data = materialize( + td, + layout=layout, + pad_value_dict=pad_value_dict, + pad_to_multiple=pad_to_multiple, + ) + else: + data = None + data = _broadcast_batched_data_dict( + data, + src=leader, + group=replica_group, + ) + # Reconstruct message_log after broadcast so the views alias + # the per-rank local ``input_ids`` rather than the leader's. + attach_message_log_view(data) + if preprocess is not None: + data = preprocess(self, data) + return data + + td = self._require_dp_client().kv_batch_get( + keys=meta.keys, + partition_id=meta.partition_id, + select_fields=list(meta.fields), # type: ignore[no-matching-overload] + ) + data = materialize( + td, + layout=layout, + pad_value_dict=pad_value_dict, + pad_to_multiple=pad_to_multiple, + ) + attach_message_log_view(data) + if preprocess is not None: + data = preprocess(self, data) + return data + + def _apply_packing_prep(self, data: BatchedDataDict[Any]) -> BatchedDataDict[Any]: + """Re-derive ``micro_batch_indices`` / ``micro_batch_lengths`` on the local slice. + + Uses ``shard_by_batch_size(shards=1, ...)``. The legacy DP path computes those + as a side effect of the DP-shard call; the TQ presharded path receives a + per-rank slice without them set, so we recompute here using ``self.cfg``. + """ + cfg = getattr(self, "cfg", None) + if not isinstance(cfg, dict): + return data + seqpack = cfg.get("sequence_packing", {}) or {} + dynbatch = cfg.get("dynamic_batching", {}) or {} + + if seqpack.get("enabled", False): + spa = { + "algorithm": seqpack["algorithm"], + "input_key": "input_ids", + "input_lengths_key": "input_lengths", + "sequence_length_pad_multiple": cfg[ + "make_sequence_length_divisible_by" + ], + "max_tokens_per_microbatch": seqpack["train_mb_tokens"], + } + packed, _ = data.shard_by_batch_size( + shards=1, + batch_size=None, + # pyrefly: ignore # bad-argument-type + sequence_packing_args=spa, + ) + return packed[0] + + if dynbatch.get("enabled", False): + dba = { + "input_key": "input_ids", + "input_lengths_key": "input_lengths", + "sequence_length_round": dynbatch["sequence_length_round"], + "max_tokens_per_microbatch": dynbatch["train_mb_tokens"], + } + sharded, _ = data.shard_by_batch_size( + shards=1, + batch_size=None, + # pyrefly: ignore # bad-argument-type + dynamic_batching_args=dba, + ) + return sharded[0] + + return data + + def _attach_or_repack_pack_metadata( + self, + data: BatchedDataDict[Any], + meta: "KVBatchMeta", + ) -> BatchedDataDict[Any]: + """Trust driver-supplied packing metadata or re-derive locally. + + When the driver pre-balanced packing across DP ranks it ships + ``micro_batch_indices`` / ``micro_batch_lengths`` (and optionally + ``elem_counts_per_gb``) in ``meta.extra_info``. Locally + re-packing produces variable bin counts across DP groups and + desyncs Megatron's per-microbatch collectives — trust the driver + when it provided the metadata. + """ + extra = meta.extra_info or {} + if MICRO_BATCH_INDICES in extra and MICRO_BATCH_LENGTHS in extra: + data.micro_batch_indices = extra[MICRO_BATCH_INDICES] + data.micro_batch_lengths = extra[MICRO_BATCH_LENGTHS] + if ELEM_COUNTS_PER_GB in extra: + data.elem_counts_per_gb = extra[ELEM_COUNTS_PER_GB] + return data + return self._apply_packing_prep(data) + + def _is_replica_leader(self) -> bool: + """True iff this rank should perform per-DP-rank-unique side-effects. + + Examples include TQ write-back. Always True for non-replicated configs. + """ + replica_group = self._get_replica_group() + if replica_group is None: + return True + leader = torch.distributed.get_global_rank(replica_group, 0) + return torch.distributed.get_rank() == leader + + def _is_writeback_leader(self) -> bool: + """True iff this rank is the TPƗCPƗPP leader for write-back to TQ. + + Distinct from :meth:`_is_replica_leader` because that one piggybacks + on :meth:`_get_replica_group`, which subclasses gate on ``CP > 1`` + (a fetch-path optimization). Under TP-only configs (e.g. TP=2, + CP=1) the replica group is ``None`` → every rank passes the + leader check → every TP rank writes the same keys, which crashes + the mooncake_cpu backend with ``-601 ILLEGAL_CLIENT`` (concurrent + UpsertStart from different Mooncake clients on the same key). + Subclasses with TP/CP/PP siblings must override to gate on the + true (TP, CP, PP) coordinates regardless of CP. + """ + return self._is_replica_leader() + + def _write_back( + self, + meta: "KVBatchMeta", + fields: dict[str, torch.Tensor], + ) -> None: + """Leader-only ``kv_batch_put(meta.keys, fields=...)``. + + Per-token fields are jagged-packed via :func:`maybe_pack_jagged` + so they land with the same row lengths as the initial put; + without this a worker write-back (rectangular ``[N, S]``) would + mismatch the jagged ``input_ids`` on the next read. + + Args: + meta: Per-rank ``KVBatchMeta`` for this slice. + fields: Map of field name to tensor to write back. + """ + if not self._is_writeback_leader() or not fields: + return + from nemo_rl.data_plane.column_io import write_columns + + write_columns(self._require_dp_client(), meta, fields) + + def _write_back_result_field( + self, + meta: "KVBatchMeta", + result: Any, + *, + result_key: str, + tq_field: str, + ) -> None: + """Single chokepoint for ``*_presharded`` write-backs. + + ``result`` is checked via the ``Mapping`` ABC because + ``BatchedDataDict`` is a ``UserDict`` (not ``dict``). + + Args: + meta: Per-rank ``KVBatchMeta`` for this slice. + result: Worker output containing ``result_key``. + result_key: Key into ``result`` for the tensor to write back. + tq_field: Field name on the TQ side. + """ + if self._dp_client is None: + return + from collections.abc import Mapping + + if not isinstance(result, Mapping) or result_key not in result: + raise RuntimeError( + f"_write_back_result_field: result type {type(result).__name__} " + f"missing key {result_key!r}; cannot write back." + ) + val = result[result_key] + if not isinstance(val, torch.Tensor): + raise TypeError( + f"_write_back_result_field: result[{result_key!r}] is " + f"{type(val).__name__}, expected torch.Tensor." + ) + if val.shape[0] != len(meta.keys): + raise ValueError( + f"_write_back_result_field: shape mismatch — " + f"result[{result_key!r}] has batch dim {val.shape[0]} " + f"but meta.keys has {len(meta.keys)}." + ) + self._write_back(meta, {tq_field: val.detach().to("cpu")}) + + @wrap_with_nvtx_name("policy_worker/train_presharded") + def train_presharded( + self, + meta: "KVBatchMeta", + loss_fn: Any, + eval_mode: bool = False, + gbs: Optional[int] = None, + mbs: Optional[int] = None, + ) -> dict[str, Any]: + """Per-rank training entrypoint. Fetch → packing prep → delegate.""" + data = self._fetch(meta) + data = self._attach_or_repack_pack_metadata(data, meta) + return self.train( # type: ignore[attr-defined] + data, + loss_fn=loss_fn, + eval_mode=eval_mode, + gbs=gbs, + mbs=mbs, + ) + + @wrap_with_nvtx_name("policy_worker/get_logprobs_presharded") + def get_logprobs_presharded( + self, + meta: "KVBatchMeta", + micro_batch_size: Optional[int] = None, + ) -> BatchedDataDict[Any]: + """Per-rank logprob entrypoint. Fetch → packing prep → run → write back.""" + data = self._fetch(meta) + data = self._attach_or_repack_pack_metadata(data, meta) + result: BatchedDataDict[Any] = self.get_logprobs( # type: ignore[attr-defined] + data=data, + micro_batch_size=micro_batch_size, + ) + # Canonical TQ column name is "prev_logprobs" (matches what + # ``train_presharded`` fetches for the loss). + self._write_back_result_field( + meta, + result, + result_key="logprobs", + tq_field="prev_logprobs", + ) + return result + + @wrap_with_nvtx_name("policy_worker/get_reference_policy_logprobs_presharded") + def get_reference_policy_logprobs_presharded( + self, + meta: "KVBatchMeta", + micro_batch_size: Optional[int] = None, + ) -> BatchedDataDict[ReferenceLogprobOutputSpec]: + """Per-rank reference-policy logprob entrypoint.""" + data = self._fetch(meta) + data = self._attach_or_repack_pack_metadata(data, meta) + result: BatchedDataDict[ReferenceLogprobOutputSpec] = ( + self.get_reference_policy_logprobs( # type: ignore[attr-defined] + data=data, + micro_batch_size=micro_batch_size, + ) + ) + self._write_back_result_field( + meta, + result, + result_key="reference_logprobs", + tq_field="reference_policy_logprobs", + ) + return result diff --git a/nemo_rl/distributed/ray_actor_environment_registry.py b/nemo_rl/distributed/ray_actor_environment_registry.py index 30b0ae80bd..41f85567a3 100644 --- a/nemo_rl/distributed/ray_actor_environment_registry.py +++ b/nemo_rl/distributed/ray_actor_environment_registry.py @@ -45,6 +45,8 @@ "nemo_rl.algorithms.async_utils.AsyncTrajectoryCollector": PY_EXECUTABLES.VLLM, # ReplayBuffer needs vLLM environment to handle trajectory data from VllmGenerationWorker "nemo_rl.algorithms.async_utils.ReplayBuffer": PY_EXECUTABLES.VLLM, + # SyncRolloutActor drives vLLM rollouts and writes flattened tensors (tensordict) to TQ + "nemo_rl.experience.sync_rollout_actor.SyncRolloutActor": PY_EXECUTABLES.VLLM, "nemo_rl.environments.tools.retriever.RAGEnvironment": PY_EXECUTABLES.SYSTEM, "nemo_rl.environments.nemo_gym.NemoGym": PY_EXECUTABLES.NEMO_GYM, } diff --git a/nemo_rl/experience/rollouts.py b/nemo_rl/experience/rollouts.py index ab417e0491..cde522eab3 100644 --- a/nemo_rl/experience/rollouts.py +++ b/nemo_rl/experience/rollouts.py @@ -96,7 +96,10 @@ def generate_responses( generated_texts = tokenizer.batch_decode(generated_ids, skip_special_tokens=True) - # Append to message log + # Per-row slices alias the vllm output arena; safe in the data-plane + # path because `sync_rollout_actor.rollout_to_tq` calls + # `decompose_message_log` before the wire, so no tensor reaches + # per-row pickle. for i, (text, input_length, total_length) in enumerate( zip(generated_texts, input_lengths, unpadded_sequence_lengths) ): @@ -198,7 +201,7 @@ async def generate_responses_async( generated_texts = tokenizer.batch_decode(generated_ids, skip_special_tokens=True) - # Append to message log + # Slice aliasing safe; see sync version above. for i, (text, input_length, total_length) in enumerate( zip(generated_texts, input_lengths, unpadded_sequence_lengths) ): diff --git a/nemo_rl/experience/sync_rollout_actor.py b/nemo_rl/experience/sync_rollout_actor.py new file mode 100644 index 0000000000..ea953d93c6 --- /dev/null +++ b/nemo_rl/experience/sync_rollout_actor.py @@ -0,0 +1,315 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Sync GRPO rollout actor — sibling of ``async_utils``. + +Houses :class:`SyncRolloutActor`, the Ray actor that owns the multi-turn +rollout loop AND the post-rollout flatten / mask / prompt extraction / +reward shaping / baseline-std for a sync GRPO step. The driver dispatches +a per-step prompt batch + uids; the actor runs ``run_multi_turn_rollout`` +(or async / nemo_gym variants), then writes the bulk schema to TQ via +:func:`nemo_rl.data_plane.column_io.kv_first_write`. Only a ``KVBatchMeta`` +and a small per-sample slice (rewards, masks, lengths, baseline/std, +prompt_ids_for_adv) cross back to the driver via Ray. + +**Goal — rollout 1-hop put**: bulk tensors (input_ids, output_ids, +attention_mask, position_ids, multi_modal_inputs, generation_logprobs, +token_mask) stay actor-side until ``kv_batch_put``, then live only in +TQ. Driver never holds these bytes between rollout finish and train +fan-out. + +The actor is the sync counterpart to +:class:`nemo_rl.algorithms.async_utils.AsyncTrajectoryCollector`. It +intentionally does not buffer or stream — sync GRPO consumes the whole +step batch in one call. +""" + +from __future__ import annotations + +from typing import Any, Optional + +import numpy as np +import ray +import torch + +from nemo_rl.data_plane.column_io import kv_first_write +from nemo_rl.data_plane.interfaces import KVBatchMeta +from nemo_rl.distributed.batched_data_dict import BatchedDataDict +from nemo_rl.environments.interfaces import EnvironmentInterface +from nemo_rl.experience.rollouts import ( + run_async_multi_turn_rollout, + run_async_nemo_gym_rollout, + run_multi_turn_rollout, +) +from nemo_rl.models.generation.interfaces import GenerationInterface + + +@ray.remote # pragma: no cover +class SyncRolloutActor: + """Per-step rollout dispatcher. + + Runs: rollout + flatten + mask + prompt extraction + baseline/std + TQ put. + Returns ``(meta, slice, metrics)``. + + Lifecycle: one instance per ``grpo_train_sync`` invocation. The driver + instantiates with the same handles it would normally pass to + ``run_multi_turn_rollout`` plus the data-plane config so the actor + can attach as a TQ client (``bootstrap=False`` — controller is + bootstrapped on the driver via ``TQPolicy``). + """ + + def __init__( + self, + policy_generation: GenerationInterface, + tokenizer: Any, + task_to_env: dict[str, EnvironmentInterface], + master_config: Any, + dp_cfg: dict[str, Any], + ) -> None: + self.policy_generation = policy_generation + self.tokenizer = tokenizer + self.task_to_env = task_to_env + self.master_config = master_config + + from nemo_rl.data_plane import build_data_plane_client + + self._dp_client = build_data_plane_client(dp_cfg, bootstrap=False) + + def rollout_to_tq( + self, + input_batch: BatchedDataDict[Any], + *, + uids: list[str], + partition_id: str, + first_iter: bool = True, + ) -> tuple[ + KVBatchMeta, + dict[str, Any], + dict[str, Any], + Optional[dict[str, Any]], + ]: + """Run the full per-step generation cycle and write bulk data to TQ. + + Bundles six steps into one Ray round-trip so the driver only sees + a single RPC instead of separate calls for each: + + 1. **Reset metrics** — ``policy_generation.clear_logger_metrics()`` + clears per-step generation accumulators before the rollout. + 2. **Rollout** — runs ``run_multi_turn_rollout`` (or the async / + nemo-gym variants) to produce ``final_batch``. + 3. **Flatten + mask + prompt extraction** — converts + ``message_log`` layout to flat tensors; builds token mask, + sample mask, prompt-only ids, baseline/std. + 4. **Write bulk to TQ** — ``kv_first_write`` puts every tensor + field in one flat ``kv_batch_put``; the driver never touches + bulk bytes. + 5. **Release GPU** — ``policy_generation.finish_generation()`` + frees KV cache and inference state so the trainer can use the + GPU immediately. + 6. **Capture metrics** — ``policy_generation.get_logger_metrics()`` + collects generation stats (throughput, etc.) and returns them + to the driver in the result tuple. + + The driver receives ``(meta, slice, rollout_metrics, + generation_logger_metrics)`` and uses only the small per-sample + slice for its own compute (rewards, advantages, dynamic sampling). + + Args: + input_batch: Per-step prompt batch (already repeat-interleaved). + uids: One uid per prompt; bulk keys are ``f"{uid}_g{i}"``. + partition_id: TQ partition target. + first_iter: True on the first DS iteration of a step; drives + ``policy_generation.snapshot_step_metrics()`` so per-step + metrics align with the legacy ``grpo.grpo_train`` path. + + Returns: + ``(meta, slice, rollout_metrics, generation_logger_metrics)``. + """ + # Lazy imports — avoid pulling grpo into this module at load. + from nemo_rl.algorithms.grpo import ( + _extract_prompt_only_messages, + _should_use_async_rollouts, + _should_use_nemo_gym, + ) + from nemo_rl.algorithms.utils import get_gdpo_reward_component_keys + from nemo_rl.data.llm_message_utils import ( + MESSAGE_LOG_BULK_FIELDS, + add_loss_mask_to_message_log, + batched_message_log_to_flat_message, + decompose_message_log, + ) + + # Per-step generation-side metric hooks: snapshot once on the + # first DS iter so backends with per-step deltas have a stable + # anchor; clear accumulators before every rollout. Mirrors + # legacy ``grpo_train``. + if self.policy_generation is not None: + if first_iter and hasattr(self.policy_generation, "snapshot_step_metrics"): + self.policy_generation.snapshot_step_metrics() + self.policy_generation.clear_logger_metrics() + + cfg = self.master_config + common = dict( + policy_generation=self.policy_generation, + input_batch=input_batch, + tokenizer=self.tokenizer, + task_to_env=self.task_to_env, + greedy=False, + ) + + # Rollout dispatch (mirrors grpo_sync.py:294-349). + if _should_use_nemo_gym(cfg): + r = run_async_nemo_gym_rollout( + **common, + max_seq_len=None, + max_rollout_turns=None, + generation_config=cfg["policy"]["generation"], + ) + final_batch, rollout_metrics = r.final_batch, r.rollout_metrics + else: + runner = ( + run_async_multi_turn_rollout + if _should_use_async_rollouts(cfg) + else run_multi_turn_rollout + ) + final_batch, rollout_metrics = runner( + **common, + max_seq_len=cfg["policy"]["max_total_sequence_length"], + max_rollout_turns=cfg["grpo"]["max_rollout_turns"], + ) + fb = final_batch.to("cpu") + del final_batch + + # Assistant-only loss mask (shared helper); seed missing + # generation_logprobs (e.g. when the env wraps assistant turns + # without a backing logprob, or for greedy/replay rollouts). + add_loss_mask_to_message_log(fb["message_log"]) + for ml in fb["message_log"]: + for msg in ml: + msg.setdefault( + "generation_logprobs", + torch.zeros_like(msg["token_ids"], dtype=torch.float32), + ) + + # Flatten message_log → bulk tensors + extract prompt-only ids. + pad = {"pad_value_dict": {"token_ids": self.tokenizer.pad_token_id}} + flat, input_lengths = batched_message_log_to_flat_message( + fb["message_log"], + **pad, + make_sequence_length_divisible_by=cfg["policy"][ + "make_sequence_length_divisible_by" + ], + ) + prompt_flat, _ = batched_message_log_to_flat_message( + _extract_prompt_only_messages(fb["message_log"]), + **pad, + ) + + # TQ bulk payload — DP_TRAIN_FIELDS + multimodal extras. + bulk_batch = BatchedDataDict[Any]( + { + "input_ids": flat["token_ids"], + "input_lengths": input_lengths, + "generation_logprobs": flat["generation_logprobs"], + "token_mask": flat["token_loss_mask"], + "sample_mask": fb["loss_multiplier"], + } + ) + for k, v in flat.get_multimodal_dict(as_tensors=False).items(): + if isinstance(v, torch.Tensor): + bulk_batch[k] = v + # ``content`` (raw assistant text per sample) — rides TQ as a + # NonTensorStack so the driver can fetch it back at jsonl time + # (kv_first_write wraps it via NonTensorStack). + if "content" in flat: + bulk_batch["content"] = np.asarray(flat["content"], dtype=object) + + # Split `message_log` into per-field arrays instead of pickling + # the list-of-dicts-with-tensors per row. Consumer rebuilds + # `message_log` on read; external API stays the same. + decomposed = decompose_message_log(fb["message_log"]) + for k in MESSAGE_LOG_BULK_FIELDS: + bulk_batch[k] = decomposed[k] + + # Pass through remaining non-tensor fb fields as object arrays; + # `message_log` is excluded since its tensors live in the + # decomposed fields above (per-row pickle of dict-with-tensors + # would smuggle aliased views into the wire). + for k, v in fb.items(): + if isinstance(v, torch.Tensor) or k in bulk_batch or k == "message_log": + continue + bulk_batch[k] = ( + v + if isinstance(v, np.ndarray) and v.dtype == object + else np.asarray(v, dtype=object) + ) + + # Slice — only what the driver can't derive from a TQ slice fetch + # (anything containing `message_log` or per-token data would + # force a fetch). Driver does scale_rewards / reward_shaping / + # overlong filtering / baseline-std on this slice. + truncated = fb["truncated"] + if not isinstance(truncated, torch.Tensor): + truncated = torch.tensor(truncated, dtype=torch.bool) + length = fb.get("length", input_lengths) + if not isinstance(length, torch.Tensor): + length = torch.tensor(length) + slice_extras = { + "total_reward": fb["total_reward"], + "loss_multiplier": fb["loss_multiplier"], + "truncated": truncated, + "length": length, + "input_lengths": input_lengths, + "prompt_ids_for_adv": prompt_flat["token_ids"], + # Computed by decompose_message_log above; feeds + # apply_reward_shaping on the driver without a TQ fetch. + "response_token_lengths": decomposed["response_token_lengths"], + } + # GDPO multi-reward components: scale_rewards iterates these + # keys driver-side and the GDPO advantage estimator reads them + # from rb_for_adv. Plumb them through the slice rather than + # forcing a separate TQ fetch. + for k in get_gdpo_reward_component_keys(fb): + slice_extras[k] = fb[k] + + n_samples = int(bulk_batch["sample_mask"].shape[0]) + if len(uids) == 0 or n_samples % len(uids) != 0: + raise ValueError( + f"bulk_batch has {n_samples} samples; not divisible by len(uids)={len(uids)}" + ) + n_gen = n_samples // len(uids) + keys = [f"{uid}_g{i}" for uid in uids for i in range(n_gen)] + meta = kv_first_write( + bulk_batch, + keys=keys, + dp_client=self._dp_client, + partition_id=partition_id, + extra_info={"rollout_metrics": rollout_metrics}, + task_name=partition_id, + pad_to_multiple=int( + cfg["policy"].get("make_sequence_length_divisible_by") or 1 + ), + ) + + if self.policy_generation is not None: + self.policy_generation.finish_generation() + gen_metrics = self.policy_generation.get_logger_metrics() + else: + gen_metrics = None + return meta, slice_extras, rollout_metrics, gen_metrics + + def shutdown(self) -> None: + try: + self._dp_client.close() + except Exception: + pass diff --git a/nemo_rl/models/policy/lm_policy.py b/nemo_rl/models/policy/lm_policy.py index c3f7772c42..a67442915f 100644 --- a/nemo_rl/models/policy/lm_policy.py +++ b/nemo_rl/models/policy/lm_policy.py @@ -367,6 +367,91 @@ def init_collective( # this function should co-work with vllm, so we should wait for all futures to complete outside return futures + # ── DP-shard helpers ──────────────────────────────────────────────── + # Shared between this Policy class (in-memory dispatch) and the + # planned ``TQPolicy(Policy)`` subclass (TQ-mediated dispatch). Each + # sharder mutates ``self.dynamic_batching_args`` / + # ``self.sequence_packing_args`` to set the appropriate + # ``max_tokens_per_microbatch`` (logprob_mb_tokens vs train_mb_tokens), + # exactly as the legacy bodies do today. + def _shard_for_logprob( + self, + data: BatchedDataDict[Any], + ) -> tuple[list["SlicedDataDict"], Optional[list[int]]]: + """Shard inputs for ``get_logprobs`` / ``get_reference_policy_logprobs``. + + Mirrors the legacy shard block (lines 426-450 / 503-530). Returns + ``(sharded_data, unsorted_data_indices)`` where the second element + is the inverse permutation needed to undo seqpack/dynbatch reorder + (``None`` when neither is enabled). + """ + dp_size = self.sharding_annotations.get_axis_size("data_parallel") + if self.use_dynamic_batches: + self.dynamic_batching_args["max_tokens_per_microbatch"] = self.cfg[ + "dynamic_batching" + ]["logprob_mb_tokens"] + sharded_data, unsorted_data_indices = data.shard_by_batch_size( # type: ignore + dp_size, + batch_size=None, + dynamic_batching_args=self.dynamic_batching_args, + ) + elif self.use_sequence_packing: + self.sequence_packing_args["max_tokens_per_microbatch"] = self.cfg[ + "sequence_packing" + ]["logprob_mb_tokens"] + # we just shard into DP shards here as Sequence packing allows for CP. + sharded_data, unsorted_data_indices = data.shard_by_batch_size( + dp_size, + batch_size=None, + sequence_packing_args=self.sequence_packing_args, + ) + else: + sharded_data = data.shard_by_batch_size( # type: ignore + dp_size, + batch_size=None, + ) + unsorted_data_indices = None + return sharded_data, unsorted_data_indices + + def _shard_for_train( + self, + data: BatchedDataDict[Any], + batch_size: int, + ) -> list["SlicedDataDict"]: + """Shard inputs for ``train``. + + Mirrors the legacy shard block (lines 706-729). Note vs. + ``_shard_for_logprob``: uses ``train_mb_tokens`` (not + ``logprob_mb_tokens``), passes ``batch_size`` (not None), and + does not return ``unsorted_data_indices`` because train returns + scalar metrics (no per-row outputs to reorder). + """ + dp_size = self.sharding_annotations.get_axis_size("data_parallel") + if self.use_dynamic_batches: + self.dynamic_batching_args["max_tokens_per_microbatch"] = self.cfg[ + "dynamic_batching" + ]["train_mb_tokens"] + sharded_data, _ = data.shard_by_batch_size( + dp_size, + batch_size=batch_size, + dynamic_batching_args=self.dynamic_batching_args, + ) + elif self.use_sequence_packing: + self.sequence_packing_args["max_tokens_per_microbatch"] = self.cfg[ + "sequence_packing" + ]["train_mb_tokens"] + sharded_data, _ = data.shard_by_batch_size( + dp_size, + batch_size=batch_size, + sequence_packing_args=self.sequence_packing_args, + ) + else: + sharded_data = data.shard_by_batch_size( + dp_size, + batch_size=batch_size, + ) + return sharded_data + def get_logprobs( self, data: BatchedDataDict[GenerationDatumSpec], @@ -379,35 +464,8 @@ def get_logprobs( We use the convention that the logprob of the first token is 0 so that the sequence length is maintained. The logprob of input token i is specified at position i in the output logprobs tensor. """ - dp_size = self.sharding_annotations.get_axis_size("data_parallel") - sharded_data: list[SlicedDataDict] - unsorted_data_indices: list[int] - with timer.time("get_logprobs/shard_data") if timer else nullcontext(): - if self.use_dynamic_batches: - self.dynamic_batching_args["max_tokens_per_microbatch"] = self.cfg[ - "dynamic_batching" - ]["logprob_mb_tokens"] - sharded_data, unsorted_data_indices = data.shard_by_batch_size( # type: ignore - dp_size, - batch_size=None, - dynamic_batching_args=self.dynamic_batching_args, - ) - elif self.use_sequence_packing: - self.sequence_packing_args["max_tokens_per_microbatch"] = self.cfg[ - "sequence_packing" - ]["logprob_mb_tokens"] - # we just shard into DP shards here as Sequence packing allows for CP. - sharded_data, unsorted_data_indices = data.shard_by_batch_size( - dp_size, - batch_size=None, - sequence_packing_args=self.sequence_packing_args, - ) - else: - sharded_data = data.shard_by_batch_size( # type: ignore - dp_size, - batch_size=None, - ) + sharded_data, unsorted_data_indices = self._shard_for_logprob(data) with ( timer.time("get_logprobs/submit_logprob_futures") @@ -435,7 +493,7 @@ def get_logprobs( # dynamic batching sorts the inputs by sequence length to improve load balancing, # so change it back here - if self.use_dynamic_batches or self.use_sequence_packing: + if unsorted_data_indices is not None: logprobs.reorder_data(unsorted_data_indices) return logprobs @@ -450,37 +508,12 @@ def get_reference_policy_logprobs( Returns: Identical to get_logprobs. """ - dp_size = self.sharding_annotations.get_axis_size("data_parallel") - sharded_data: list[SlicedDataDict] - unsorted_data_indices: list[int] with ( timer.time("get_reference_policy_logprobs/shard_data") if timer else nullcontext() ): - if self.use_dynamic_batches: - self.dynamic_batching_args["max_tokens_per_microbatch"] = self.cfg[ - "dynamic_batching" - ]["logprob_mb_tokens"] - sharded_data, unsorted_data_indices = data.shard_by_batch_size( # type: ignore - dp_size, - batch_size=None, - dynamic_batching_args=self.dynamic_batching_args, - ) - elif self.use_sequence_packing: - self.sequence_packing_args["max_tokens_per_microbatch"] = self.cfg[ - "sequence_packing" - ]["logprob_mb_tokens"] - sharded_data, unsorted_data_indices = data.shard_by_batch_size( - dp_size, - batch_size=None, - sequence_packing_args=self.sequence_packing_args, - ) - else: - sharded_data = data.shard_by_batch_size( # type: ignore - dp_size, - batch_size=None, - ) + sharded_data, unsorted_data_indices = self._shard_for_logprob(data) with ( timer.time( @@ -513,7 +546,7 @@ def get_reference_policy_logprobs( # dynamic batching sorts the inputs by sequence length to improve load balancing, # so change it back here - if self.use_dynamic_batches or self.use_sequence_packing: + if unsorted_data_indices is not None: logprobs.reorder_data(unsorted_data_indices) return logprobs @@ -526,34 +559,8 @@ def get_topk_logits( timer: Optional[Timer] = None, ) -> BatchedDataDict[TopkLogitsOutputSpec]: """Dispatch get_topk_logits to workers (no CP/packed support initially).""" - dp_size = self.sharding_annotations.get_axis_size("data_parallel") - sharded_data: list[SlicedDataDict] - unsorted_data_indices: list[int] with timer.time("get_topk_logits/shard_data") if timer else nullcontext(): - if self.use_dynamic_batches: - self.dynamic_batching_args["max_tokens_per_microbatch"] = self.cfg[ - "dynamic_batching" - ]["logprob_mb_tokens"] - sharded_data, unsorted_data_indices = data.shard_by_batch_size( # type: ignore - dp_size, - batch_size=None, - dynamic_batching_args=self.dynamic_batching_args, - ) - elif self.use_sequence_packing: - self.sequence_packing_args["max_tokens_per_microbatch"] = self.cfg[ - "sequence_packing" - ]["logprob_mb_tokens"] - # we just shard into DP shards here as Sequence packing allows for CP. - sharded_data, unsorted_data_indices = data.shard_by_batch_size( - dp_size, - batch_size=None, - sequence_packing_args=self.sequence_packing_args, - ) - else: - sharded_data = data.shard_by_batch_size( # type: ignore - dp_size, - batch_size=None, - ) + sharded_data, unsorted_data_indices = self._shard_for_logprob(data) with ( timer.time("get_topk_logits/submit_topk_logits_futures") @@ -586,7 +593,7 @@ def get_topk_logits( stacked["topk_logits"] = torch.cat(all_topk_logits, dim=0) stacked["topk_indices"] = torch.cat(all_topk_indices, dim=0) - if self.use_dynamic_batches or self.use_sequence_packing: + if unsorted_data_indices is not None: stacked.reorder_data(unsorted_data_indices) return stacked @@ -604,31 +611,8 @@ def train( batch_size = gbs or self.cfg["train_global_batch_size"] micro_batch_size = mbs or self.cfg["train_micro_batch_size"] # Shard and replicate the batch - dp_size = self.sharding_annotations.get_axis_size("data_parallel") with timer.time("policy_training/sharding_data") if timer else nullcontext(): - if self.use_dynamic_batches: - self.dynamic_batching_args["max_tokens_per_microbatch"] = self.cfg[ - "dynamic_batching" - ]["train_mb_tokens"] - sharded_data, _ = data.shard_by_batch_size( - dp_size, - batch_size=batch_size, - dynamic_batching_args=self.dynamic_batching_args, - ) - elif self.use_sequence_packing: - self.sequence_packing_args["max_tokens_per_microbatch"] = self.cfg[ - "sequence_packing" - ]["train_mb_tokens"] - sharded_data, _ = data.shard_by_batch_size( - dp_size, - batch_size=batch_size, - sequence_packing_args=self.sequence_packing_args, - ) - else: - sharded_data = data.shard_by_batch_size( - dp_size, - batch_size=batch_size, - ) + sharded_data = self._shard_for_train(data, batch_size) if self.flops_tracker is not None: self.flops_tracker.reset() diff --git a/nemo_rl/models/policy/tq_policy.py b/nemo_rl/models/policy/tq_policy.py new file mode 100644 index 0000000000..b9adebd92e --- /dev/null +++ b/nemo_rl/models/policy/tq_policy.py @@ -0,0 +1,376 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""TQ-mediated Policy: meta-driven 1-hop counterpart to ``Policy``. + +Exposes ``train_from_meta`` / ``get_logprobs_from_meta`` / +``get_reference_policy_logprobs_from_meta`` — same return shapes as +``Policy.{train, get_logprobs, get_reference_policy_logprobs}`` but +accepting a ``KVBatchMeta`` instead of a ``BatchedDataDict``. The meta +names per-sample TQ keys minted once at rollout +(:class:`nemo_rl.experience.sync_rollout_actor.SyncRolloutActor`); each +dispatch slices the key list per DP rank via +:func:`nemo_rl.data_plane.preshard.shard_meta_for_dp` (no re-fan-out, +no key minting). Workers fetch their slice from TQ via +``self._fetch(meta)`` and write deltas back via +``self._write_back_result_field(...)``. See +``nemo_rl/data_plane/README.md`` for the full design. +""" + +from __future__ import annotations + +import warnings +from collections import defaultdict +from contextlib import nullcontext +from dataclasses import replace +from typing import Any, Optional + +import ray + +from nemo_rl.algorithms.loss.interfaces import LossFunction +from nemo_rl.data_plane import KVBatchMeta, build_data_plane_client +from nemo_rl.data_plane.preshard import shard_meta_for_dp +from nemo_rl.data_plane.schema import DP_TRAIN_FIELDS, LP_SEED_FIELDS +from nemo_rl.distributed.batched_data_dict import BatchedDataDict +from nemo_rl.models.policy.interfaces import ( + LogprobOutputSpec, + ReferenceLogprobOutputSpec, +) +from nemo_rl.models.policy.lm_policy import Policy +from nemo_rl.utils.flops_tracker import get_theoretical_tflops +from nemo_rl.utils.timer import Timer + +# ────────────────────────────────────────────────────────────────────────── +# Per-stage aggregators that assemble per-rank worker results into the +# shape each Policy method returns. Used by the TQ-mediated overrides +# below; kept out of ``lm_policy.Policy`` since the legacy in-memory +# path doesn't fan out per-rank and never calls these. +# ────────────────────────────────────────────────────────────────────────── + + +def _aggregate_train_results(results: list[dict[str, Any]]) -> dict[str, Any]: + out: dict[str, Any] = { + "loss": results[0]["global_loss"], + "grad_norm": results[0]["grad_norm"], + } + if "moe_metrics" in results[0]: + out["moe_metrics"] = results[0]["moe_metrics"] + all_mb_metrics: dict[str, list[Any]] = defaultdict(list) + for r in results: + for k, v in r["all_mb_metrics"].items(): + all_mb_metrics[k].extend(v) + out["all_mb_metrics"] = dict(all_mb_metrics) + return out + + +def _aggregate_logprob_results( + results: list[BatchedDataDict[Any]], +) -> BatchedDataDict[Any]: + return BatchedDataDict.from_batches(results, pad_value_dict={"logprobs": 0.0}) + + +def _aggregate_reference_logprob_results( + results: list[BatchedDataDict[Any]], +) -> BatchedDataDict[Any]: + return BatchedDataDict.from_batches( + results, pad_value_dict={"reference_logprobs": 0.0} + ) + + +class TQPolicy(Policy): + """TQ-mediated counterpart to :class:`Policy`. + + Constructor accepts an additional ``dp_cfg`` (the + ``master_config["data_plane"]`` dict). Bootstraps the controller on + the driver and forwards ``setup_data_plane(dp_cfg)`` to every worker + so they can attach as clients (``bootstrap=False``). + + The partition lifecycle (``register_partition`` / ``kv_clear``) is + the trainer's responsibility — this class assumes the partition + named ``self.tq_partition_id`` (default ``"train"``) is open with a + schema covering ``DP_TRAIN_FIELDS`` (the bulk schema written by the + rollout actor at first put + driver-/worker-written deltas). + """ + + def __init__( + self, + *args: Any, + dp_cfg: dict[str, Any], + tq_partition_id: str = "train", + **kwargs: Any, + ) -> None: + super().__init__(*args, **kwargs) + # Validate the topology the data plane fan-out (`shard_meta_for_dp`) + # depends on. Failing here surfaces a clear error at policy + # construction; the same condition is re-checked inside + # `shard_meta_for_dp` as a defensive invariant. + dp_world = self.sharding_annotations.get_axis_size("data_parallel") + if dp_world <= 0: + raise ValueError( + f"TQPolicy requires data_parallel axis size > 0, got {dp_world}. " + f"Check cluster config (gpus_per_node * num_nodes) vs. " + f"TP/PP/CP/EP sizes." + ) + self.dp_cfg = dp_cfg + self.dp_client = build_data_plane_client(dp_cfg, bootstrap=True) + self.tq_partition_id = tq_partition_id + + # Forward to workers (replaces ``Policy.setup_data_plane`` call + # site in the trainer — TQPolicy bundles bootstrap + worker + # attach into construction so the trainer just instantiates + # ``TQPolicy(...)`` and is done). + ray.get( + [ + getattr(w, "setup_data_plane").remote(cfg=dp_cfg) + for w in self.worker_group._workers + ] + ) + + # ── lifecycle ────────────────────────────────────────────────────── + + def shutdown(self) -> bool: # type: ignore[override] + """Close the TQ client before shutting down the worker group.""" + try: + self.dp_client.close() + except Exception as e: + warnings.warn(f"Error closing data-plane client: {e}") + return super().shutdown() + + def prepare_step( + self, + num_samples: int, + group_size: Optional[int] = None, + ) -> None: + """Register the per-step TQ partition. + + Sync trainers call this at the start of each step. The static + partition id ``"train"`` is cleared and reused across steps. The + schema is the union of all consumer fields — producers write + only the subset they have, consumers fetch via ``select_fields``. + + Args: + num_samples: Expected total samples this step. + group_size: GRPO group size for balanced sampling; ``None`` disables grouping. + """ + self.dp_client.register_partition( + partition_id=self.tq_partition_id, + fields=list(DP_TRAIN_FIELDS), + num_samples=num_samples, + consumer_tasks=["prev_lp", "ref_lp", "train"], + grpo_group_size=group_size, + ) + + # ── 1-hop entrypoints (KVBatchMeta in, no re-fan-out) ────────────────── + + def _packing_args( + self, + mb_tokens_key: str, + ) -> tuple[Optional[dict[str, Any]], Optional[dict[str, Any]]]: + """Resolve (sequence_packing_args, dynamic_batching_args) for a given stage. + + The stage is identified by ``mb_tokens_key`` (``"logprob_mb_tokens"`` or + ``"train_mb_tokens"``). + """ + if getattr(self, "use_dynamic_batches", False): + args = dict(self.dynamic_batching_args) + args["max_tokens_per_microbatch"] = self.cfg["dynamic_batching"][ + mb_tokens_key + ] + return None, args + if getattr(self, "use_sequence_packing", False): + args = dict(self.sequence_packing_args) + args["max_tokens_per_microbatch"] = self.cfg["sequence_packing"][ + mb_tokens_key + ] + return args, None + return None, None + + def _logprob_dispatch( + self, + meta: KVBatchMeta, + *, + task_name: str, + worker_method: str, + aggregate_fn: Any, + timer_prefix: str, + timer: Optional[Timer], + common_kwargs: dict[str, Any], + ) -> BatchedDataDict[Any]: + """Shared body of get_logprobs_from_meta / get_reference_policy_logprobs_from_meta. + + Logprob workers need only LP_SEED_FIELDS — narrow the meta's + field list so ``_fetch`` doesn't pull rollout-only payload (e.g. + multimodal). The same shape is used for both prev_lp and ref_lp. + """ + spa, dba = self._packing_args("logprob_mb_tokens") + lp_meta = replace(meta, fields=list(LP_SEED_FIELDS), task_name=task_name) + with timer.time(f"{timer_prefix}/shard_meta") if timer else nullcontext(): + metas, unsorted_indices = shard_meta_for_dp( + lp_meta, + dp_world=self.sharding_annotations.get_axis_size("data_parallel"), + batch_size=None, + sequence_packing_args=spa, + dynamic_batching_args=dba, + ) + with timer.time(f"{timer_prefix}/submit_futures") if timer else nullcontext(): + futures = self.worker_group.run_all_workers_sharded_data( + worker_method, + meta=metas, + in_sharded_axes=["data_parallel"], + replicate_on_axes=[ + "context_parallel", + "tensor_parallel", + "pipeline_parallel", + ], + output_is_replicated=[ + "context_parallel", + "tensor_parallel", + "pipeline_parallel", + ], + common_kwargs=common_kwargs, + ) + result = aggregate_fn(self.worker_group.get_all_worker_results(futures)) + if unsorted_indices is not None: + result.reorder_data(unsorted_indices) + return result + + def get_logprobs_from_meta( + self, + meta: KVBatchMeta, + micro_batch_size: Optional[int] = None, + timer: Optional[Timer] = None, + ) -> BatchedDataDict[LogprobOutputSpec]: + return self._logprob_dispatch( + meta, + task_name="prev_lp", + worker_method="get_logprobs_presharded", + aggregate_fn=_aggregate_logprob_results, + timer_prefix="get_logprobs", + timer=timer, + common_kwargs={"micro_batch_size": micro_batch_size}, + ) + + def get_reference_policy_logprobs_from_meta( + self, + meta: KVBatchMeta, + micro_batch_size: Optional[int] = None, + timer: Optional[Timer] = None, + ) -> BatchedDataDict[ReferenceLogprobOutputSpec]: + return self._logprob_dispatch( + meta, + task_name="ref_lp", + worker_method="get_reference_policy_logprobs_presharded", + aggregate_fn=_aggregate_reference_logprob_results, + timer_prefix="get_reference_policy_logprobs", + timer=timer, + common_kwargs={"micro_batch_size": micro_batch_size}, + ) + + def train_from_meta( + self, + meta: KVBatchMeta, + loss_fn: LossFunction, + eval_mode: bool = False, + gbs: Optional[int] = None, + mbs: Optional[int] = None, + timer: Optional[Timer] = None, + ) -> dict[str, Any]: + """1-hop counterpart to :meth:`train`. + + ``meta`` names per-sample keys; columns written by the rollout + actor + worker logprob deltas + driver-side advantage delta have + all landed under the same keys at this point. Workers fetch the + union via ``train_presharded`` → ``self._fetch(meta)``. No + partition drain here — sync 1-hop's trainer calls ``kv_clear`` + once at end of step. + + Args: + meta: Full-step ``KVBatchMeta`` (consumed by all DP ranks). + gbs: Global batch size; defaults to ``cfg["train_global_batch_size"]``. + mbs: Micro batch size; defaults to ``cfg["train_micro_batch_size"]``. + timer: Optional timer for nested ``policy_training/*`` measurements. + + Returns: + Aggregated training-step output dict. + """ + batch_size = gbs or self.cfg["train_global_batch_size"] + micro_batch_size = mbs or self.cfg["train_micro_batch_size"] + + spa, dba = self._packing_args("train_mb_tokens") + # Train workers fetch the full DP_TRAIN_FIELDS schema (rollout + + # logprob deltas + advantages + sample_mask). Caller is responsible + # for ensuring those columns have been written to TQ before this + # call (workers + driver delta-writes). + train_meta = replace( + meta, + fields=list(DP_TRAIN_FIELDS), + task_name="train", + ) + with timer.time("policy_training/shard_meta") if timer else nullcontext(): + dp_metas, _ = shard_meta_for_dp( + train_meta, + dp_world=self.sharding_annotations.get_axis_size("data_parallel"), + batch_size=batch_size, + sequence_packing_args=spa, + dynamic_batching_args=dba, + ) + + if self.flops_tracker is not None: + self.flops_tracker.reset() + for m in dp_metas: + self.flops_tracker.track_batch(list(m.sequence_lengths or [])) + + with ( + timer.time("policy_training/submit_training_futures") + if timer + else nullcontext() + ): + futures = self.worker_group.run_all_workers_sharded_data( + "train_presharded", + meta=dp_metas, + in_sharded_axes=["data_parallel"], + replicate_on_axes=[ + "context_parallel", + "tensor_parallel", + "pipeline_parallel", + ], + output_is_replicated=[ + "context_parallel", + "tensor_parallel", + "pipeline_parallel", + ], + common_kwargs={ + "loss_fn": loss_fn, + "eval_mode": eval_mode, + "gbs": batch_size, + "mbs": micro_batch_size, + }, + ) + results = self.worker_group.get_all_worker_results(futures) + aggregated_results = _aggregate_train_results(results) + + if self.flops_tracker is not None: + aggregated_results["total_flops"] = self.flops_tracker.total_flops + aggregated_results["num_ranks"] = self.worker_group.cluster.world_size() + gpus_per_worker = self.worker_group.cluster.world_size() / max( + len(results), 1 + ) + try: + aggregated_results["theoretical_tflops"] = gpus_per_worker * sum( + get_theoretical_tflops(r["gpu_name"], r["model_dtype"]) + for r in results + ) + except Exception as e: + warnings.warn(f"Error getting theoretical flops: {e}") + + return aggregated_results diff --git a/nemo_rl/models/policy/workers/dtensor_policy_worker.py b/nemo_rl/models/policy/workers/dtensor_policy_worker.py index 022335f7d0..ac43bf1193 100644 --- a/nemo_rl/models/policy/workers/dtensor_policy_worker.py +++ b/nemo_rl/models/policy/workers/dtensor_policy_worker.py @@ -162,9 +162,14 @@ def get_cpu_state_dict( return new_state_dict +from nemo_rl.data_plane.worker_mixin import TQWorkerMixin + + # Classes with @ray.remote can't be inherited from, so we split the implementation out. # This is useful when using worker extension classes. -class DTensorPolicyWorkerImpl(AbstractPolicyWorker, ColocatablePolicyInterface): +class DTensorPolicyWorkerImpl( + TQWorkerMixin, AbstractPolicyWorker, ColocatablePolicyInterface +): def __repr__(self) -> str: """Customizes the actor's prefix in the Ray logs. @@ -175,6 +180,18 @@ def __repr__(self) -> str: else: return f"{self.__class__.__qualname__}" + def _get_replica_group(self) -> Optional[Any]: + """Replica group = flattened (cp, tp) sub-mesh, gated on CP > 1. + + Returns ``None`` for CP=1 so ``_fetch`` keeps using the proven + independent path (matches the qwen3-mcore-seqpack TP=2 baseline). + Once CP > 1, broadcasting the full BatchedDataDict to (CP, TP) + siblings amortizes the TQ read across siblings that need it. + """ + if getattr(self, "cp_size", 1) <= 1: + return None + return self.device_mesh[("cp", "tp")]._flatten().get_group() + def __init__( self, config: PolicyConfig, diff --git a/nemo_rl/models/policy/workers/dtensor_policy_worker_v2.py b/nemo_rl/models/policy/workers/dtensor_policy_worker_v2.py index 2fa8a8e604..8521344b0c 100644 --- a/nemo_rl/models/policy/workers/dtensor_policy_worker_v2.py +++ b/nemo_rl/models/policy/workers/dtensor_policy_worker_v2.py @@ -188,9 +188,14 @@ def get_train_context( yield +from nemo_rl.data_plane.worker_mixin import TQWorkerMixin + + # Classes with @ray.remote can't be inherited from, so we split the implementation out. # This is useful when using worker extension classes. -class DTensorPolicyWorkerV2Impl(AbstractPolicyWorker, ColocatablePolicyInterface): +class DTensorPolicyWorkerV2Impl( + TQWorkerMixin, AbstractPolicyWorker, ColocatablePolicyInterface +): def __repr__(self) -> str: """Customizes the actor's prefix in the Ray logs. @@ -201,6 +206,26 @@ def __repr__(self) -> str: else: return f"{self.__class__.__qualname__}" + def _get_replica_group(self) -> Optional[Any]: + """Replica group = flattened (cp, tp) sub-mesh — see V1 worker.""" + if getattr(self, "cp_size", 1) <= 1: + return None + return self.device_mesh[("cp", "tp")]._flatten().get_group() + + def _is_writeback_leader(self) -> bool: + """``(cp_local_rank, tp_local_rank) == (0, 0)``. + + See :meth:`TQWorkerMixin._is_writeback_leader` for the rationale. + """ + if not hasattr(self, "device_mesh") or self.device_mesh is None: + return True + try: + cp = self.device_mesh["cp"].get_local_rank() + tp = self.device_mesh["tp"].get_local_rank() + except Exception: + return True + return cp == 0 and tp == 0 + def __init__( self, config: PolicyConfig, diff --git a/nemo_rl/models/policy/workers/megatron_policy_worker.py b/nemo_rl/models/policy/workers/megatron_policy_worker.py index 53e581c989..e6dd80fed7 100644 --- a/nemo_rl/models/policy/workers/megatron_policy_worker.py +++ b/nemo_rl/models/policy/workers/megatron_policy_worker.py @@ -95,9 +95,14 @@ TokenizerType = TypeVar("TokenizerType", bound=PreTrainedTokenizerBase) +from nemo_rl.data_plane.worker_mixin import TQWorkerMixin + + # Classes with @ray.remote can't be inherited from, so we split the implementation out. # This is useful when using worker extension classes. -class MegatronPolicyWorkerImpl(AbstractPolicyWorker, ColocatablePolicyInterface): +class MegatronPolicyWorkerImpl( + TQWorkerMixin, AbstractPolicyWorker, ColocatablePolicyInterface +): def __repr__(self): """Customizes the actor's prefix in the Ray logs. @@ -108,6 +113,73 @@ def __repr__(self): else: return f"{self.__class__.__qualname__}" + def _is_writeback_leader(self) -> bool: + """``(tp_rank, cp_rank, pp_rank) == (0, 0, 0)``. + + See :meth:`TQWorkerMixin._is_writeback_leader` for the rationale. + """ + if not torch.distributed.is_initialized(): + return True + return ( + parallel_state.get_tensor_model_parallel_rank() == 0 + and parallel_state.get_context_parallel_rank() == 0 + and parallel_state.get_pipeline_model_parallel_rank() == 0 + ) + + def _get_replica_group(self) -> Optional[Any]: + """Replica group = TP Ɨ CP Ɨ PP siblings within this DP rank. + + Gated on CP > 1: returns ``None`` when CP=1 so ``_fetch`` keeps + using the proven independent path (matches the qwen3-mcore TP=2 + baseline). Once CP > 1, broadcasting the full BatchedDataDict to + (TP, CP, PP) siblings amortizes the TQ read. + + mcore exposes per-axis groups (``get_tensor_model_parallel_group``, + ``get_context_parallel_group``, ``get_pipeline_model_parallel_group``) + but no single combined group. We build the combined NCCL group + once on first call by enumerating coordinates that share this + rank's ``dp_rank``. + """ + if not torch.distributed.is_initialized(): + return None + cached = getattr(self, "_replica_group_cache", "uninit") + if cached != "uninit": + return cached + + cp = parallel_state.get_context_parallel_world_size() + if cp <= 1: + self._replica_group_cache = None + return None + + world_size = torch.distributed.get_world_size() + my_dp_rank = parallel_state.get_data_parallel_rank() + # Collect global ranks that share this DP rank — they form the + # replica group. Done collectively so every rank ends up with + # the same ranks list and can pass it to new_group(). + my_replica_ranks_t = torch.full( + (world_size,), + -1, + dtype=torch.long, + device="cuda", + ) + my_replica_ranks_t[torch.distributed.get_rank()] = my_dp_rank + torch.distributed.all_reduce( + my_replica_ranks_t, op=torch.distributed.ReduceOp.MAX + ) + all_dp_ranks = my_replica_ranks_t.tolist() + + # Every (dp_rank → ranks) bucket must call new_group on its own + # ranks list, but new_group itself must be called collectively + # across the full world. Sort by dp_rank to keep call order + # consistent across processes. + groups: dict[int, Any] = {} + for dp in sorted(set(all_dp_ranks)): + ranks = [r for r, d in enumerate(all_dp_ranks) if d == dp] + grp = torch.distributed.new_group(ranks=ranks, backend="nccl") + groups[dp] = grp + self._replica_group_cache = groups[my_dp_rank] + return self._replica_group_cache + def __init__( self, config: PolicyConfig, diff --git a/nemo_rl/utils/venvs.py b/nemo_rl/utils/venvs.py index 667a45a9f1..9e435d125c 100644 --- a/nemo_rl/utils/venvs.py +++ b/nemo_rl/utils/venvs.py @@ -186,3 +186,35 @@ def create_local_venv_on_each_node(py_executable: str, venv_name: str): ray.util.remove_placement_group(pg) # Return mapping from node IP to venv python path return paths[0] + + +def make_actor_runtime_env(actor_class_fqn: str) -> dict: + """Build a Ray ``runtime_env`` for one of our registered actors. + + Resolves the actor's tier-specific py_executable via the registry, + materializes a per-node venv when uv-managed, and packages it with + ``VIRTUAL_ENV`` / ``UV_PROJECT_ENVIRONMENT`` env vars so workers see + the same interpreter as the driver. + + Used by ReplayBuffer, AsyncTrajectoryCollector, and + SyncRolloutActor — three actors that need the VLLM tier's + venv on every node. + """ + # Local import — venvs.py is dep-light; the registry imports + # PY_EXECUTABLES which transitively pulls heavier deps. + from nemo_rl.distributed.ray_actor_environment_registry import ( + get_actor_python_env, + ) + + py_exec = get_actor_python_env(actor_class_fqn) + if py_exec.startswith("uv"): + py_exec = create_local_venv_on_each_node(py_exec, actor_class_fqn) + venv = os.path.dirname(os.path.dirname(py_exec)) # strip bin/python + return { + "py_executable": py_exec, + "env_vars": { + **os.environ, + "VIRTUAL_ENV": venv, + "UV_PROJECT_ENVIRONMENT": venv, + }, + } diff --git a/pyproject.toml b/pyproject.toml index 09daca3267..fc8a6ac065 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,8 +2,8 @@ requires = ["setuptools>=42", "wheel>=0.46.2"] build-backend = "setuptools.build_meta" -[tool.setuptools] -packages = ["nemo_rl"] +[tool.setuptools.packages.find] +include = ["nemo_rl*"] [tool.setuptools.dynamic] version = { attr = "nemo_rl.__version__" } # any module attribute compatible with ast.literal_eval @@ -61,6 +61,29 @@ dependencies = [ "cuda-bindings; sys_platform != 'darwin'", # for non-colocated refit "pybase64", # for sglang refit "nvidia-cudnn-cu13==9.20.0.48; sys_platform != 'darwin'", # for transformer-engine no build isolation + # Data-plane stack — promoted to base so worker venvs (built by + # nemo_rl.utils.venvs.create_local_venv via bare `uv sync`, no extras) + # automatically include them. Removes the need for a `[data-plane]` + # extra and the corresponding plumbing in the per-worker venv builder. + "tensordict", + # Pinned to b266d39 (post-0.1.6, pre-0.1.7) for PR #77's MooncakeStore + # refactor: `clear` switched from unanchored `remove_by_regex` to + # exact-key `batch_remove`, which fixes a collateral-key-deletion bug + # that breaks DAPO + mooncake_cpu. Bump to the 0.1.7 tag when released. + "TransferQueue @ git+https://github.com/Ascend/TransferQueue.git@b266d39", + # Backs data_plane.backend="mooncake_cpu". Default backend is "simple" + # (in-process), but the mooncake_cpu path needs the `mooncake_master` + # binary that ships in this wheel at /mooncake/. Bundled + # with TQ rather than gated behind an extra so worker venvs (built + # without extras) can be flipped to mooncake_cpu via config alone. + # PyPI's `mooncake-transfer-engine` is cu12-only (links libcudart.so.12), + # which breaks on cu13 containers ("libcudart.so.12: cannot open shared + # object file"). Upstream ships a cu13 variant as a GitHub release + # asset under a separate distribution name `mooncake-transfer-engine-cuda13`; + # same `mooncake/` import namespace, store.so linked against + # libcudart.so.13. Pin the GitHub URL directly (same pattern as + # flash-attn below). Drop and revert to PyPI when cu13 is promoted. + "mooncake-transfer-engine-cuda13 @ https://github.com/kvcache-ai/Mooncake/releases/download/v0.3.10.post2/mooncake_transfer_engine_cuda13-0.3.10.post2-cp313-cp313-manylinux_2_35_x86_64.whl ; sys_platform == 'linux' and platform_machine == 'x86_64'", ] [project.optional-dependencies] @@ -299,6 +322,11 @@ override-dependencies = [ "outlines>=0.2.0", # Upgrade pytest to 9.0.3 "pytest>=9.0.3", + # TransferQueue (data-plane extra) pins numpy<2.0.0; megatron-core needs + # numpy>=2.1.0 via onnx → ml-dtypes. Override globally so the data-plane + # extra composes with mcore/automodel without version-mirroring TQ's + # requirements.txt. Forward-compatible across TQ minor bumps. + "numpy>=2.1.0", ] # CVE fixes diff --git a/pyrefly.toml b/pyrefly.toml index d79920b67e..4d14b6d46b 100644 --- a/pyrefly.toml +++ b/pyrefly.toml @@ -91,6 +91,18 @@ project-includes = [ "nemo_rl/data/multimodal_utils.py", "nemo_rl/data/packing/__init__.py", "nemo_rl/data/processors.py", + "nemo_rl/data_plane/__init__.py", + "nemo_rl/data_plane/adapters/__init__.py", + "nemo_rl/data_plane/adapters/noop.py", + "nemo_rl/data_plane/adapters/transfer_queue.py", + "nemo_rl/data_plane/codec.py", + "nemo_rl/data_plane/column_io.py", + "nemo_rl/data_plane/factory.py", + "nemo_rl/data_plane/interfaces.py", + "nemo_rl/data_plane/observability.py", + "nemo_rl/data_plane/preshard.py", + "nemo_rl/data_plane/schema.py", + "nemo_rl/data_plane/worker_mixin.py", "nemo_rl/distributed/__init__.py", "nemo_rl/distributed/collectives.py", "nemo_rl/distributed/named_sharding.py", diff --git a/tests/data_plane/__init__.py b/tests/data_plane/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/data_plane/conftest.py b/tests/data_plane/conftest.py new file mode 100644 index 0000000000..5618469b02 --- /dev/null +++ b/tests/data_plane/conftest.py @@ -0,0 +1,33 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Shared fixtures for data-plane tests. + +Deliberately slim. The parent ``tests/unit/conftest.py`` drags in +``mlflow``, ``torch.distributed``, ``init_ray`` etc. — none of which are +needed for data-plane Tier 1 tests. Per the test plan §11 we keep our +conftest local and minimal so unit tests run in a slim venv (torch + +tensordict + pytest only). +""" + +from __future__ import annotations + +import pathlib + +import pytest + + +@pytest.fixture(scope="session") +def repo_root() -> pathlib.Path: + """Absolute path to the repo root (computed from this file's location).""" + return pathlib.Path(__file__).resolve().parents[2] diff --git a/tests/data_plane/functional/__init__.py b/tests/data_plane/functional/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/data_plane/functional/conftest.py b/tests/data_plane/functional/conftest.py new file mode 100644 index 0000000000..02fd766231 --- /dev/null +++ b/tests/data_plane/functional/conftest.py @@ -0,0 +1,69 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tier 2 (functional) fixtures — Ray + transfer_queue, single-node, no GPU.""" + +from __future__ import annotations + +import uuid + +import pytest + + +@pytest.fixture +def ray_namespace() -> str: + """Per-test Ray namespace so xdist-style parallel runs don't collide.""" + return f"dp-test-{uuid.uuid4().hex[:8]}" + + +@pytest.fixture +def ray_session(ray_namespace): + """Init Ray with a unique namespace; tear down after the test.""" + pytest.importorskip("ray") + pytest.importorskip("transfer_queue") + import ray + + if ray.is_initialized(): + ray.shutdown() + ray.init(namespace=ray_namespace, include_dashboard=False, log_to_driver=False) + try: + yield ray_namespace + finally: + if ray.is_initialized(): + ray.shutdown() + + +@pytest.fixture +def tq_simple_cfg(): + """Minimal SimpleStorage config for TQ functional tests.""" + return { + "enabled": True, + "impl": "transfer_queue", + "backend": "simple", + "storage_capacity": 1024, + "num_storage_units": 1, + } + + +def pytest_collection_modifyitems(config, items): + """If transfer_queue isn't installed, mark all tests in this dir + as skipped with a clear reason — no silent skip.""" + try: + import transfer_queue # noqa: F401 + except ImportError: + skip = pytest.mark.skip( + reason="transfer_queue not installed (it's a base dep — " + "try `uv sync` to refresh)" + ) + for item in items: + item.add_marker(skip) diff --git a/tests/data_plane/functional/test_seqpack_equivalence.py b/tests/data_plane/functional/test_seqpack_equivalence.py new file mode 100644 index 0000000000..a119a56325 --- /dev/null +++ b/tests/data_plane/functional/test_seqpack_equivalence.py @@ -0,0 +1,303 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Byte-level equivalence between legacy and TQ seqpack/dynbatch paths. + +Both paths share ``BatchedDataDict.shard_by_batch_size(shards=DP_world, +sequence_packing_args=...)`` for cross-DP balance (Option 1 fix). The only +implementation difference is data transport: legacy hands each shard's +tensors directly to the worker; TQ writes them into the queue, then the +worker reads them back. + +This test isolates the seqpack/dynbatch math from rollout sampling, NCCL +non-determinism, and optimizer steps. If it passes, the only remaining +sources of legacy-vs-TQ run-to-run divergence live outside NeMo-RL. + +Spec: + 1. Build a deterministic ``train_data`` with variable input lengths. + 2. Run ``shard_by_batch_size`` on the driver — this is the *one* call + both paths share. Save its output as the legacy reference. + 3. Round-trip each shard through TQ (``kv_batch_put`` → + ``kv_batch_get`` → ``materialize``) and re-attach the per-shard + packing metadata from ``extra_info`` (what + ``train_presharded`` does in production). + 4. Assert each rank's tensors and packing metadata are byte-identical + to the legacy reference. +""" + +from __future__ import annotations + +import os + +import pytest +import torch +from tensordict import TensorDict + +transfer_queue = pytest.importorskip("transfer_queue") # noqa: F841 + +from nemo_rl.data_plane import build_data_plane_client, materialize +from nemo_rl.distributed.batched_data_dict import BatchedDataDict + +# Mirror of the seed-field set in nemo_rl/algorithms/grpo_sync.py. +_DP_SEED_FIELDS = ( + "input_ids", + "input_lengths", + "generation_logprobs", + "prev_logprobs", + "reference_policy_logprobs", + "advantages", + "token_mask", + "sample_mask", +) + +# ── loud-skip helpers ───────────────────────────────────────────────────────── + +_REQUIRE_MOONCAKE = os.environ.get("NEMO_RL_REQUIRE_MOONCAKE") == "1" + + +def _mooncake_available() -> bool: + try: + import mooncake # noqa: F401 + except ImportError: + if _REQUIRE_MOONCAKE: + raise + return False + return True + + +# ── fixtures ────────────────────────────────────────────────────────────────── + + +def _make_tq_cfg(backend: str) -> dict: + return { + "enabled": True, + "impl": "transfer_queue", + "backend": backend, + "storage_capacity": 1024, + "num_storage_units": 1, + } + + +@pytest.fixture( + params=["simple", "mooncake_cpu"], + ids=["simple", "mooncake_cpu"], +) +def tq_client(request, ray_session): + """Parametrized fixture over simple and mooncake_cpu backends. + + mooncake_cpu is skipped when the mooncake wheel is not installed. + Set NEMO_RL_REQUIRE_MOONCAKE=1 to promote the skip to a loud failure. + + ray_session comes from tests/data_plane/functional/conftest.py. + """ + backend = request.param + if backend == "mooncake_cpu" and not _mooncake_available(): + pytest.skip( + "mooncake not installed — skipping mooncake_cpu seqpack equivalence " + "(set NEMO_RL_REQUIRE_MOONCAKE=1 to fail loud)" + ) + client = build_data_plane_client(_make_tq_cfg(backend)) + yield client + client.close() + + +def _make_fake_train_data( + n_samples: int = 64, + max_seqlen: int = 4096, + seed: int = 42, +) -> BatchedDataDict: + """Stand-in for GRPO ``train_data``. + + Variable lengths in ``[256, max_seqlen]`` so the bin packer actually + produces multiple bins per shard — flat-length data would trivially + match. + """ + g = torch.Generator().manual_seed(seed) + input_lengths = torch.randint(256, max_seqlen + 1, (n_samples,), generator=g) + input_ids = torch.zeros((n_samples, max_seqlen), dtype=torch.long) + for i in range(n_samples): + n = int(input_lengths[i]) + input_ids[i, :n] = torch.randint(1, 50000, (n,), generator=g) + return BatchedDataDict( + { + "input_ids": input_ids, + "input_lengths": input_lengths, + "advantages": torch.randn(n_samples, max_seqlen, generator=g), + "token_mask": torch.ones(n_samples, max_seqlen), + "sample_mask": torch.ones(n_samples), + "prev_logprobs": torch.randn(n_samples, max_seqlen, generator=g), + "reference_policy_logprobs": torch.randn( + n_samples, max_seqlen, generator=g + ), + "generation_logprobs": torch.randn(n_samples, max_seqlen, generator=g), + } + ) + + +def _round_trip_shards_through_tq( + tq_client, + pre_shards: list, + partition_id: str, +) -> list[BatchedDataDict]: + """Put each shard's seed fields to TQ, fetch back, attach packing metadata. + + This is the same dance the production driver+worker does: + ``grpo_sync.py`` builds per-rank metas and seeds TQ; ``train_presharded`` + fetches its slice and attaches ``extra_info`` packing metadata. + """ + n_total = sum(int(s["sample_mask"].shape[0]) for s in pre_shards) + tq_client.register_partition( + partition_id=partition_id, + fields=list(_DP_SEED_FIELDS), + num_samples=n_total, + consumer_tasks=["train"], + ) + out: list[BatchedDataDict] = [] + for r, shard in enumerate(pre_shards): + n = int(shard["sample_mask"].shape[0]) + keys = [f"r{r}_s{i}" for i in range(n)] + names = [ + f + for f in _DP_SEED_FIELDS + if f in shard and isinstance(shard[f], torch.Tensor) + ] + fields = TensorDict( + {f: shard[f].detach().contiguous() for f in names}, + batch_size=[n], + ) + tq_client.kv_batch_put( + keys=keys, + partition_id=partition_id, + fields=fields, + ) + td_back = tq_client.kv_batch_get( + keys=keys, + partition_id=partition_id, + select_fields=list(names), + ) + bdd = materialize(td_back, layout="padded") + bdd.micro_batch_indices = shard.micro_batch_indices + bdd.micro_batch_lengths = shard.micro_batch_lengths + bdd.elem_counts_per_gb = shard.elem_counts_per_gb + out.append(bdd) + return out + + +def _assert_shards_byte_equal(legacy, recovered, *, expect_metadata: bool) -> None: + assert len(legacy) == len(recovered), ( + f"shard count mismatch: legacy={len(legacy)} tq={len(recovered)}" + ) + for r, (L, T) in enumerate(zip(legacy, recovered)): + L_tensor_keys = {k for k, v in L.data.items() if isinstance(v, torch.Tensor)} + # TQ only transmits _DP_SEED_FIELDS — non-seed legacy fields are + # out of scope for this test. + common = L_tensor_keys & set(_DP_SEED_FIELDS) + assert common <= set(T.data.keys()), ( + f"rank {r}: TQ shard missing seed fields {common - set(T.data.keys())}" + ) + for k in common: + assert L[k].shape == T[k].shape, ( + f"rank {r} field {k}: shape {L[k].shape} != {T[k].shape}" + ) + assert L[k].dtype == T[k].dtype, ( + f"rank {r} field {k}: dtype {L[k].dtype} != {T[k].dtype}" + ) + assert torch.equal(L[k], T[k]), f"rank {r} field {k}: byte-level mismatch" + if expect_metadata: + assert L.micro_batch_indices == T.micro_batch_indices, ( + f"rank {r} micro_batch_indices mismatch" + ) + assert L.micro_batch_lengths == T.micro_batch_lengths, ( + f"rank {r} micro_batch_lengths mismatch" + ) + assert L.elem_counts_per_gb == T.elem_counts_per_gb, ( + f"rank {r} elem_counts_per_gb mismatch" + ) + + +def test_seqpack_legacy_equals_tq(tq_client): + """Sequence packing: legacy shards == TQ-roundtripped shards (byte-level).""" + DP_WORLD = 4 + GBS = 64 + spa = { + "algorithm": "modified_first_fit_decreasing", + "input_key": "input_ids", + "input_lengths_key": "input_lengths", + "sequence_length_pad_multiple": 64, + "max_tokens_per_microbatch": 4096, + } + data = _make_fake_train_data(n_samples=GBS) + + legacy_shards, _ = data.shard_by_batch_size( + DP_WORLD, + batch_size=GBS, + sequence_packing_args=spa, + ) + tq_pre_shards, _ = data.shard_by_batch_size( + DP_WORLD, + batch_size=GBS, + sequence_packing_args=spa, + ) + recovered = _round_trip_shards_through_tq( + tq_client, + tq_pre_shards, + partition_id="seqpack-eq", + ) + _assert_shards_byte_equal(legacy_shards, recovered, expect_metadata=True) + + +def test_dynbatch_legacy_equals_tq(tq_client): + """Dynamic batching: same equivalence claim as seqpack.""" + DP_WORLD = 4 + GBS = 64 + dba = { + "input_key": "input_ids", + "input_lengths_key": "input_lengths", + "sequence_length_round": 64, + "max_tokens_per_microbatch": 4096, + } + data = _make_fake_train_data(n_samples=GBS) + + legacy_shards, _ = data.shard_by_batch_size( + DP_WORLD, + batch_size=GBS, + dynamic_batching_args=dba, + ) + tq_pre_shards, _ = data.shard_by_batch_size( + DP_WORLD, + batch_size=GBS, + dynamic_batching_args=dba, + ) + recovered = _round_trip_shards_through_tq( + tq_client, + tq_pre_shards, + partition_id="dynbatch-eq", + ) + _assert_shards_byte_equal(legacy_shards, recovered, expect_metadata=True) + + +def test_no_packing_legacy_equals_tq(tq_client): + """Sanity: even without packing/dynbatch the transport should be lossless.""" + DP_WORLD = 4 + GBS = 64 + data = _make_fake_train_data(n_samples=GBS) + + legacy_shards = data.shard_by_batch_size(DP_WORLD, batch_size=GBS) + tq_pre_shards = data.shard_by_batch_size(DP_WORLD, batch_size=GBS) + recovered = _round_trip_shards_through_tq( + tq_client, + tq_pre_shards, + partition_id="nopack-eq", + ) + # No packing → no micro_batch_* metadata to compare. + _assert_shards_byte_equal(legacy_shards, recovered, expect_metadata=False) diff --git a/tests/data_plane/functional/test_tq_lifecycle.py b/tests/data_plane/functional/test_tq_lifecycle.py new file mode 100644 index 0000000000..b09adae299 --- /dev/null +++ b/tests/data_plane/functional/test_tq_lifecycle.py @@ -0,0 +1,355 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Single-node TQ smoke — Stage 1 acceptance. + +Mirrors the recipe in the integration plan §3 / Stage 1: +register → put → claim_meta → get_data → check_consumption → clear. + +Skipped when the ``transfer_queue`` package is not installed so CI without +the data-plane extra still passes. +""" + +from __future__ import annotations + +import os + +import numpy as np +import pytest +import torch +from tensordict import TensorDict + +transfer_queue = pytest.importorskip("transfer_queue") # noqa: F841 + +from tensordict import NonTensorStack + +from nemo_rl.data_plane import build_data_plane_client +from nemo_rl.data_plane.column_io import read_columns +from nemo_rl.data_plane.interfaces import KVBatchMeta + +# ── loud-skip helpers ───────────────────────────────────────────────────────── + +_REQUIRE_MOONCAKE = os.environ.get("NEMO_RL_REQUIRE_MOONCAKE") == "1" + + +def _mooncake_available() -> bool: + try: + import mooncake # noqa: F401 + except ImportError: + if _REQUIRE_MOONCAKE: + raise + return False + return True + + +# ── fixtures ────────────────────────────────────────────────────────────────── + + +@pytest.fixture +def tq_client(): + import ray + + if not ray.is_initialized(): + ray.init(local_mode=False, include_dashboard=False) + + client = build_data_plane_client( + { + "enabled": True, + "impl": "transfer_queue", + "backend": "simple", + "storage_capacity": 1024, + "num_storage_units": 1, + } + ) + yield client + client.close() + + +@pytest.fixture( + params=["simple", "mooncake_cpu"], + ids=["simple", "mooncake_cpu"], +) +def tq_client_backends(request): + """Parametrized fixture over simple and mooncake_cpu backends. + + mooncake_cpu is skipped when the mooncake wheel is not installed. + Set NEMO_RL_REQUIRE_MOONCAKE=1 to promote the skip to a loud failure. + """ + backend = request.param + if backend == "mooncake_cpu" and not _mooncake_available(): + pytest.skip( + "mooncake not installed — skipping mooncake_cpu backend " + "(set NEMO_RL_REQUIRE_MOONCAKE=1 to fail loud)" + ) + + import ray + + if not ray.is_initialized(): + ray.init(local_mode=False, include_dashboard=False) + + client = build_data_plane_client( + { + "enabled": True, + "impl": "transfer_queue", + "backend": backend, + "storage_capacity": 1024, + "num_storage_units": 1, + } + ) + yield client + client.close() + + +def test_smoke_round_trip(tq_client) -> None: + tq_client.register_partition( + partition_id="smoke", + fields=["x"], + num_samples=4, + consumer_tasks=["read"], + ) + keys = ["a", "b", "c", "d"] + tq_client.kv_batch_put( + keys=keys, + partition_id="smoke", + fields=TensorDict({"x": torch.arange(4)}, batch_size=[4]), + ) + + meta = tq_client.claim_meta( + partition_id="smoke", + task_name="read", + required_fields=["x"], + batch_size=4, + timeout_s=30.0, + ) + assert meta.size == 4 + + data = tq_client.get_data(meta) + # Order may differ from input — match against the meta's keys. + expected = torch.tensor([keys.index(k) for k in meta.keys]) + assert torch.equal(data["x"], expected) + + assert tq_client.check_consumption_status("smoke", ["read"]) + + tq_client.kv_clear(keys=None, partition_id="smoke") + + +def test_smoke_round_trip_backends(tq_client_backends) -> None: + """Smoke round-trip parameterized over both backends. + + Covers P5 (T2-backend-bytewise-equal) — the same put/get lifecycle must + work on simple and mooncake_cpu. mooncake_cpu is skipped when unavailable. + """ + client = tq_client_backends + client.register_partition( + partition_id="smoke-backend", + fields=["x"], + num_samples=4, + consumer_tasks=["read"], + ) + keys = ["a", "b", "c", "d"] + client.kv_batch_put( + keys=keys, + partition_id="smoke-backend", + fields=TensorDict({"x": torch.arange(4)}, batch_size=[4]), + ) + + meta = client.claim_meta( + partition_id="smoke-backend", + task_name="read", + required_fields=["x"], + batch_size=4, + timeout_s=30.0, + ) + assert meta.size == 4 + + data = client.get_data(meta) + expected = torch.tensor([keys.index(k) for k in meta.keys]) + assert torch.equal(data["x"], expected) + + client.kv_clear(keys=None, partition_id="smoke-backend") + + +def test_smoke_round_trip_1d_fields(tq_client) -> None: + """A 1D (N,) tensor put into TQ must come back as (N,), not (N,1). + + Regression guard for R-C2: TQ's KVStorageManager path silently unsqueezes + 1D fields. The adapter's `_promote_1d_leaves` + `_from_wire` pair fix + this for the mooncake_cpu backend; this test verifies simple backend does + not introduce the regression. + """ + n = 6 + reward = torch.arange(n, dtype=torch.float32) + + tq_client.register_partition( + partition_id="smoke-1d", + fields=["reward"], + num_samples=n, + consumer_tasks=["read"], + ) + keys = [f"k{i}" for i in range(n)] + tq_client.kv_batch_put( + keys=keys, + partition_id="smoke-1d", + fields=TensorDict({"reward": reward}, batch_size=[n]), + ) + + meta = tq_client.claim_meta( + partition_id="smoke-1d", + task_name="read", + required_fields=["reward"], + batch_size=n, + timeout_s=30.0, + ) + data = tq_client.get_data(meta) + + assert data["reward"].shape == reward.shape, ( + f"Expected shape {tuple(reward.shape)} for 1D field, " + f"got {tuple(data['reward'].shape)}. " + "TQ must not unsqueeze 1D tensors silently (R-C2)." + ) + + tq_client.kv_clear(keys=None, partition_id="smoke-1d") + + +# ── Object-field round-trip across backends ─────────────────────────────────── +# +# Closes the coverage gap: prior tests exercised np.ndarray(object) only via +# the in-process codec (test_codec_object.py) or sent tensor-only fields +# through both backends (test_smoke_round_trip_backends). Sending object +# fields through mooncake_cpu was untested. This test covers that path. + + +def _object_payload(n: int) -> np.ndarray: + """Heterogeneous per-row Python objects, mimicking message_log shape.""" + rows = [ + { + "id": i, + "text": f"sample {i} content " * (i % 5 + 1), # variable-length strings + "tags": [f"t{i}", f"t{i + 1}"], + } + for i in range(n) + ] + arr = np.empty(n, dtype=object) + for i, r in enumerate(rows): + arr[i] = r + return arr + + +def test_object_round_trip_backends(tq_client_backends) -> None: + """np.ndarray(dtype=object) put → get → decode equality, both backends. + + Mirrors the wire used by ``SyncRolloutActor.kv_first_write`` for + ``message_log`` / ``content``: object fields ride as + ``NonTensorStack`` leaves (TQ-native non-tensor passthrough); + :func:`read_columns` → :func:`materialize` decodes them back to + ``np.ndarray(dtype=object)``. + """ + client = tq_client_backends + n = 8 + field_name = "msg_log" + keys = [f"obj_{i}" for i in range(n)] + + client.register_partition( + partition_id="obj-backend", + fields=[field_name], + num_samples=n, + consumer_tasks=["read"], + ) + client.kv_batch_put( + keys=keys, + partition_id="obj-backend", + fields=TensorDict( + {field_name: NonTensorStack(*_object_payload(n).tolist())}, + batch_size=[n], + ), + ) + meta = KVBatchMeta( + partition_id="obj-backend", + task_name="read", + keys=keys, + fields=[field_name], + ) + + bdd = read_columns(client, meta, select_fields=[field_name]) + + assert isinstance(bdd[field_name], np.ndarray) + assert bdd[field_name].dtype == object + assert bdd[field_name].shape == (n,) + expected = _object_payload(n) + for i in range(n): + assert bdd[field_name][i] == expected[i], ( + f"row {i} mismatch: got {bdd[field_name][i]!r}, expected {expected[i]!r}" + ) + + client.kv_clear(keys=None, partition_id="obj-backend") + + +def test_object_and_tensor_mixed_round_trip_backends(tq_client_backends) -> None: + """Mixed tensor + object fields in one put — exercises the actor's + real schema (tensors + object data side-by-side). + + Regression guard: object writes coexisting with tensor writes must + not corrupt either side. Co-fetch decodes the tensor via padding + and the ``NonTensorStack`` leaf via :func:`materialize` in one call. + """ + client = tq_client_backends + n = 6 + keys = [f"mx_{i}" for i in range(n)] + + client.register_partition( + partition_id="mix-backend", + fields=["ids", "lens", "msg"], + num_samples=n, + consumer_tasks=["read"], + ) + ids = torch.arange(n * 4, dtype=torch.long).reshape(n, 4) + lens = torch.full((n,), 4, dtype=torch.long) + msg = NonTensorStack(*_object_payload(n).tolist()) + + client.kv_batch_put( + keys=keys, + partition_id="mix-backend", + fields=TensorDict( + {"ids": ids, "lens": lens, "msg": msg}, + batch_size=[n], + ), + ) + + meta = KVBatchMeta( + partition_id="mix-backend", + task_name="read", + keys=keys, + fields=["ids", "lens", "msg"], + sequence_lengths=[4] * n, + ) + + # Read all three together — tensor fields decode via padding, + # object field decodes via NonTensorStack passthrough. + bdd = read_columns(client, meta, select_fields=["ids", "lens", "msg"]) + assert torch.equal(bdd["ids"], ids) + assert torch.equal(bdd["lens"], lens) + expected = _object_payload(n) + for i in range(n): + assert bdd["msg"][i] == expected[i] + + # Read just the tensor. + only_ids = read_columns(client, meta, select_fields=["ids"]) + assert torch.equal(only_ids["ids"], ids) + assert "msg" not in only_ids + + # Read just the object. + only_msg = read_columns(client, meta, select_fields=["msg"]) + assert isinstance(only_msg["msg"], np.ndarray) + assert "ids" not in only_msg + + client.kv_clear(keys=None, partition_id="mix-backend") diff --git a/tests/data_plane/functional/test_tq_multinode.py b/tests/data_plane/functional/test_tq_multinode.py new file mode 100644 index 0000000000..9f5aea1146 --- /dev/null +++ b/tests/data_plane/functional/test_tq_multinode.py @@ -0,0 +1,98 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""2-node Slurm smoke — verifies controller-actor placement and ZMQ. + +Driver registers a partition, a producer Ray actor on a different node +puts data, the driver fetches and validates. Run via ``RL/ray.sub`` over +2 nodes (mirrors ``rl-arena/launch/run_arena.sh``). + +Skipped automatically when: + * ``transfer_queue`` is not installed, or + * the test is invoked on a single-node Ray cluster. +""" + +from __future__ import annotations + +import pytest +import torch +from tensordict import TensorDict + +transfer_queue = pytest.importorskip("transfer_queue") # noqa: F841 + + +def _ray_node_count() -> int: + import ray + + if not ray.is_initialized(): + return 0 + return len([n for n in ray.nodes() if n.get("Alive", False)]) + + +@pytest.mark.skipif(_ray_node_count() < 2, reason="requires a multi-node Ray cluster") +def test_multinode_round_trip() -> None: + import ray + + from nemo_rl.data_plane import build_data_plane_client + + driver = build_data_plane_client( + { + "enabled": True, + "impl": "transfer_queue", + "backend": "simple", + "storage_capacity": 1024, + "num_storage_units": 2, + } + ) + + try: + driver.register_partition( + partition_id="mn", + fields=["x"], + num_samples=4, + consumer_tasks=["read"], + ) + + @ray.remote(num_cpus=1) + def produce(keys: list[str]) -> None: + from nemo_rl.data_plane import build_data_plane_client + + actor_client = build_data_plane_client( + {"enabled": True, "impl": "transfer_queue", "backend": "simple"} + ) + try: + actor_client.kv_batch_put( + keys=keys, + partition_id="mn", + fields=TensorDict( + {"x": torch.arange(len(keys))}, batch_size=[len(keys)] + ), + ) + finally: + actor_client.close() + + ray.get(produce.remote(["a", "b", "c", "d"])) + + meta = driver.claim_meta( + partition_id="mn", + task_name="read", + required_fields=["x"], + batch_size=4, + timeout_s=60.0, + ) + assert meta.size == 4 + data = driver.get_data(meta) + assert int(data["x"].sum()) == 0 + 1 + 2 + 3 + finally: + driver.kv_clear(keys=None, partition_id="mn") + driver.close() diff --git a/tests/unit/data_plane/__init__.py b/tests/unit/data_plane/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/unit/data_plane/conftest.py b/tests/unit/data_plane/conftest.py new file mode 100644 index 0000000000..7cd80b1ff0 --- /dev/null +++ b/tests/unit/data_plane/conftest.py @@ -0,0 +1,14 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tier 1 (unit) fixtures — no Ray, no GPU, no transfer_queue.""" diff --git a/tests/unit/data_plane/test_architecture_invariants.py b/tests/unit/data_plane/test_architecture_invariants.py new file mode 100644 index 0000000000..e59e445862 --- /dev/null +++ b/tests/unit/data_plane/test_architecture_invariants.py @@ -0,0 +1,300 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Static architecture invariants — see test plan §4.8. + +Cheap regex-level tests. Run in milliseconds. Catch entire classes of +drift around the verl-style sibling-trainer split: + + * legacy ``grpo.py`` is fully untouched by the data plane, + * ``grpo_sync.py`` requires a TQPolicy with no feature-gate temptation, + * the production factory has no NoOp escape hatch, + * ``examples/run_grpo.py`` dispatches both trainers explicitly. + +Plan §4.8 was written assuming a ``train_from_dp_meta`` separate-method +design. We instead chose subclass-based polymorphism: ``TQPolicy`` +overrides ``Policy`` methods, and ``examples/run_grpo.py`` selects +which policy + trainer pair is constructed. +""" + +from __future__ import annotations + +import pathlib +import re + +import pytest + +REPO = pathlib.Path(__file__).resolve().parents[3] + + +def _read(rel: str) -> str: + return (REPO / rel).read_text() + + +def _strip_comments_and_docstrings(src: str) -> str: + """Best-effort cleaner so we don't false-positive on docstring text.""" + src = re.sub(r"#.*", "", src) + src = re.sub(r'""".*?"""', "", src, flags=re.DOTALL) + src = re.sub(r"'''.*?'''", "", src, flags=re.DOTALL) + return src + + +# ─── R-C9 — sync trainer engages the data plane (TQPolicy design) ──────── + + +def test_grpo_sync_engages_tq_policy(): + """Sync trainer must require a TQ-mediated policy. + + The TQ engagement is now encapsulated in + :class:`nemo_rl.models.policy.tq_policy.TQPolicy` — the trainer's job + is to enforce that the policy in hand actually carries the TQ + transport (``policy.dp_cfg`` is the public marker set by + ``TQPolicy.__init__``). Without this guard, a misconfiguration could + silently route through the legacy in-memory dispatch. + + The TQ wire-level constructs (``KVBatchMeta``, ``shard_meta_for_dp``, + ``build_data_plane_client``) belong inside ``tq_policy.py`` / + ``preshard.py``, not in the trainer. + """ + src = _strip_comments_and_docstrings(_read("nemo_rl/algorithms/grpo_sync.py")) + assert 'hasattr(policy, "dp_cfg")' in src or "hasattr(policy, 'dp_cfg')" in src, ( + "grpo_sync.py must guard on `hasattr(policy, 'dp_cfg')` so a " + "non-TQ Policy instance is rejected with a clear error." + ) + # TQ engagement happens through the policy's overridden methods — + # check that the chain reaches a real KVBatchMeta construction. + helper_src = _strip_comments_and_docstrings(_read("nemo_rl/data_plane/preshard.py")) + assert "KVBatchMeta(" in helper_src, ( + "preshard.py must still construct KVBatchMeta — TQPolicy " + "delegates here on each fan-out." + ) + tq_policy_src = _strip_comments_and_docstrings( + _read("nemo_rl/models/policy/tq_policy.py") + ) + assert "build_data_plane_client(" in tq_policy_src, ( + "TQPolicy must construct the data-plane client in __init__." + ) + + +def test_grpo_sync_requires_data_plane_enabled(): + """The sync trainer should hard-fail when invoked without the data + plane enabled — running it in legacy mode is a category error.""" + src = _strip_comments_and_docstrings(_read("nemo_rl/algorithms/grpo_sync.py")) + # Either a guard or a direct require — at minimum the error must be + # raised when enabled=False. + assert "raise ValueError" in src or "raise RuntimeError" in src, ( + "grpo_sync.py should raise when data_plane is not enabled." + ) + # And the failure message should name the legacy escape hatch so + # users can self-recover. + assert "grpo_train" in src or "grpo.py" in src, ( + "grpo_sync.py's enabled-required error should point users at the legacy trainer." + ) + + +def test_no_feature_gate_pattern_in_either_trainer(): + """Catch the next 'just one if branch' temptation in *either* + trainer — the sibling-trainer split forbids cross-trainer + conditionals.""" + legacy = _strip_comments_and_docstrings(_read("nemo_rl/algorithms/grpo.py")) + sync = _strip_comments_and_docstrings(_read("nemo_rl/algorithms/grpo_sync.py")) + + # In the legacy trainer, ANY data_plane-conditional is wrong — + # legacy must not even know the data plane exists. + legacy_forbidden = [ + r"if\s+.*data_plane", + r"if\s+.*tq\b", + r"if\s+.*transfer_queue", + r"cfg\.get\([\"']data_plane", + r"master_config\[[\"']data_plane", + r"master_config\.get\([\"']data_plane", + ] + for pat in legacy_forbidden: + m = re.findall(pat, legacy) + assert not m, ( + f"legacy grpo.py reintroduced a data-plane gate: " + f"pattern {pat!r} matched {m}." + ) + + # In the sync trainer, an early "is enabled?" guard is allowed + # (we use one), but per-stage feature gates inside the loop are not. + # Heuristic: feature-gate guards inside an inner block tend to look + # like `if dp_client is not None:` after the early guard already + # raised. Allow the early guard once; warn on more. + n_dp_client_gates = len(re.findall(r"if\s+dp_client\s+is\s+not\s+None", sync)) + assert n_dp_client_gates == 0, ( + f"grpo_sync.py has {n_dp_client_gates} `if dp_client is not None` " + "guards. Sync trainer assumes the client is always present — " + "the existence check belongs at the top of the function only." + ) + + +# ─── R-C10 — factory rejects NoOp in production ────────────────────────── + + +def test_factory_does_not_construct_noop(): + """The production factory must not return a NoOp client. + + ``NoOpDataPlaneClient`` is test-only; importing it directly from + ``adapters/noop.py`` is fine in tests, but the factory has no + business handing it out. + """ + src = _read("nemo_rl/data_plane/factory.py") + # No import of NoOp from the factory. + assert "NoOpDataPlaneClient" not in src, ( + "factory.py imports/constructs NoOpDataPlaneClient. NoOp must " + "be reachable only via direct import from tests." + ) + # Disabled or unknown impl raises. + assert "raise ValueError" in src, ( + "factory.py must fail-fast on disabled or unknown impl." + ) + + +def test_factory_rejects_disabled_impl(): + """Factory must raise — not return None, not return a NoOp — when + the caller passes ``enabled=False``. The legacy trainer should not + call the factory at all.""" + src = _read("nemo_rl/data_plane/factory.py") + cleaned = _strip_comments_and_docstrings(src) + # The enabled-check should land before any impl dispatch. + assert re.search(r"enabled.*False|not.*enabled", cleaned), ( + "factory.py is missing an enabled-check. Disabled cfg must " + "fail-fast, not silently return a client." + ) + + +# ─── examples/run_grpo.py dispatches both trainers ─────────────────────── + + +def test_run_grpo_dispatches_both_trainers(): + """The example script must explicitly route between the two + trainers based on ``data_plane.enabled``.""" + src = _read("examples/run_grpo.py") + cleaned = _strip_comments_and_docstrings(src) + assert "grpo_train" in cleaned, "run_grpo.py must reference legacy grpo_train" + assert "grpo_train_sync" in cleaned, ( + "run_grpo.py must reference grpo_train_sync (the TQ-mediated trainer)" + ) + # Routing must read the data_plane config block somewhere — check + # against the original (un-stripped) source so we cover both inline + # access (`master_config["data_plane"]`) and `.get("data_plane")`. + assert '"data_plane"' in src or "'data_plane'" in src, ( + 'run_grpo.py should read master_config["data_plane"] to dispatch.' + ) + assert re.search(r"\.get\(\s*[\"']enabled[\"']", cleaned), ( + "run_grpo.py should branch on the data-plane `enabled` flag." + ) + + +# ─── Legacy trainer must not import grpo_sync (one-way dependency) ─────── + + +def test_legacy_does_not_import_sync(): + """Dependency direction: ``grpo_sync.py`` imports helpers from + ``grpo.py``. The reverse must never hold or we'd recreate the + coupling we split.""" + legacy = _read("nemo_rl/algorithms/grpo.py") + assert "grpo_sync" not in legacy, ( + "legacy grpo.py imports from grpo_sync.py. The dependency " + "direction is one-way: sync imports legacy helpers, never " + "the other way around." + ) + + +# ─── pack_per_token_field export guard (commit 45f4ffb8) ───────────────────── + + +def test_pack_per_token_field_is_exported() -> None: + """pack_per_token_field must be importable from nemo_rl.data_plane.codec. + + Guards against silent deletion of the helper added in commit 45f4ffb8. + The function handles the qwen3 + TP + SP padding case where + val.shape[1] > max(lengths); maybe_pack_jagged is shape-strict and + cannot handle that. + """ + from nemo_rl.data_plane.codec import pack_per_token_field # noqa: F401 + + assert callable(pack_per_token_field), ( + "nemo_rl.data_plane.codec.pack_per_token_field must be callable. " + "It was added in commit 45f4ffb8 to handle SP-padded-wider write-backs." + ) + + +@pytest.mark.xfail( + strict=True, + reason=( + "pack_per_token_field defined in codec.py:151 but no callers — " + "wiring incomplete on this branch (45f4ffb8). " + "When wired, this test xpasses and someone removes the marker." + ), +) +def test_pack_per_token_field_is_wired_into_writeback() -> None: + """At least one of the three write-back call sites must import + pack_per_token_field. + + Known sites still using maybe_pack_jagged as of commit 45f4ffb8: + - nemo_rl/data_plane/worker_mixin.py:336 + - nemo_rl/data_plane/column_io.py:85 + - nemo_rl/experience/sync_rollout_actor.py:107 + + If this test FAILS (i.e., the xfail is not triggered), the SP-padded-wider + write-back regression (commit 45f4ffb8) is no longer guarded. + Wire `pack_per_token_field` into at least one of the three call sites to + make this test xpass, then remove the xfail marker. + """ + sites = [ + "nemo_rl/data_plane/worker_mixin.py", + "nemo_rl/data_plane/column_io.py", + "nemo_rl/experience/sync_rollout_actor.py", + ] + found_in_any = False + for rel_path in sites: + src = _read(rel_path) + if "pack_per_token_field" in src: + found_in_any = True + break + + assert found_in_any, ( + "None of the three write-back call sites reference pack_per_token_field:\n" + + "\n".join(f" {s}" for s in sites) + + "\nIf this fails, the SP-padded-wider write-back regression " + "(commit 45f4ffb8) is no longer guarded — wire `pack_per_token_field` " + "into one of the three call sites." + ) + + +# ─── ABC contract method names — catch silent renames ──────────────────── + + +@pytest.mark.parametrize( + "method", + [ + "register_partition", + "claim_meta", + "get_data", + "kv_batch_put", + "kv_batch_get", + "kv_clear", + "check_consumption_status", + "close", + ], +) +def test_abc_method_present(method): + """The DataPlaneClient ABC contract is the swap surface. Renaming + a method silently is a breaking change for every adapter.""" + src = _read("nemo_rl/data_plane/interfaces.py") + assert f"def {method}" in src, ( + f"DataPlaneClient ABC is missing required method {method!r}. " + f"This is a breaking change for every adapter (G2)." + ) diff --git a/tests/unit/data_plane/test_codec_jagged.py b/tests/unit/data_plane/test_codec_jagged.py new file mode 100644 index 0000000000..6fa8c1648b --- /dev/null +++ b/tests/unit/data_plane/test_codec_jagged.py @@ -0,0 +1,172 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Unit tests for the padded ↔ jagged codec bridge. + +Phase 1 of the wire-jagged plan: writer emits nested, reader pads on +demand. These tests cover the conversion helpers in isolation; e2e +parity is validated separately. +""" + +from __future__ import annotations + +import pytest +import torch +from tensordict import TensorDict + +from nemo_rl.data_plane.codec import ( + materialize, + response_from_nested, + to_nested_by_length, +) + + +def _padded(rows: list[list[int]], pad: int = 0) -> tuple[torch.Tensor, torch.Tensor]: + """Pad a list of int sequences to a rectangle; return (padded, lengths).""" + n = len(rows) + s = max(len(r) for r in rows) + out = torch.full((n, s), pad, dtype=torch.long) + lens = torch.tensor([len(r) for r in rows], dtype=torch.long) + for i, r in enumerate(rows): + out[i, : len(r)] = torch.tensor(r, dtype=torch.long) + return out, lens + + +# ── to_nested_by_length ─────────────────────────────────────────────── + + +def test_to_nested_by_length_strips_padding() -> None: + """The right-pad columns must NOT be in the nested output.""" + padded, lens = _padded([[1, 2, 3], [4, 5], [6, 7, 8, 9]], pad=0) + nested = to_nested_by_length(padded, lens) + assert nested.is_nested + rows = list(nested.unbind()) + assert torch.equal(rows[0], torch.tensor([1, 2, 3])) + assert torch.equal(rows[1], torch.tensor([4, 5])) + assert torch.equal(rows[2], torch.tensor([6, 7, 8, 9])) + + +def test_to_nested_by_length_preserves_dtype() -> None: + """bf16 in → bf16 out.""" + padded = torch.randn((3, 5), dtype=torch.bfloat16) + lens = torch.tensor([2, 4, 5], dtype=torch.long) + nested = to_nested_by_length(padded, lens) + assert nested.dtype == torch.bfloat16 + + +def test_to_nested_by_length_rejects_shape_mismatch() -> None: + padded = torch.zeros((3, 4)) + bad_lens = torch.tensor([1, 2]) # only 2, not 3 + with pytest.raises(ValueError, match=r"lengths shape"): + to_nested_by_length(padded, bad_lens) + + +def test_to_nested_by_length_rejects_1d_input() -> None: + with pytest.raises(ValueError, match=r"\(N, S"): + to_nested_by_length(torch.zeros(5), torch.tensor([5])) + + +# ── materialize: jagged → padded ────────────────────────────────────── + + +def test_materialize_pads_nested_with_field_specific_pad_value() -> None: + """Token field padded with pad_token_id; mask padded with 0. + + This is the contract worker code expects: the padded view it + receives looks identical to a rectangular tensor produced by + batched_message_log_to_flat_message. + """ + ids_padded, lens = _padded([[10, 20, 30], [40, 50], [60, 70, 80, 90]], pad=0) + mask_padded, _ = _padded([[1, 1, 1], [1, 1], [1, 1, 1, 1]], pad=0) + ids_nested = to_nested_by_length(ids_padded, lens) + mask_nested = to_nested_by_length(mask_padded, lens) + + td = TensorDict( + {"input_ids": ids_nested, "token_mask": mask_nested}, + batch_size=[3], + ) + + bdd = materialize( + td, + layout="padded", + pad_value_dict={"input_ids": 999, "token_mask": 0}, + ) + + # Tokens are padded with the requested ID, not 0. + assert bdd["input_ids"].shape == (3, 4) + assert bdd["input_ids"][0, 3].item() == 999 # row 0 needs 1 pad + assert bdd["input_ids"][1, 2].item() == 999 # row 1 needs 2 pads + assert bdd["input_ids"][1, 3].item() == 999 + assert bdd["input_ids"][2, 3].item() == 90 # row 2 needs no padding + + # Mask uses the default 0 — match the source. + assert bdd["token_mask"].shape == (3, 4) + assert bdd["token_mask"][0, 3].item() == 0 + assert bdd["token_mask"][2, 3].item() == 1 + + +def test_materialize_passes_through_rectangular_tensors() -> None: + """Already-padded fields are emitted unchanged (no spurious copy).""" + rect = torch.tensor([[1, 2, 3], [4, 5, 6]], dtype=torch.long) + td = TensorDict({"sample_mask": rect}, batch_size=[2]) + bdd = materialize(td, layout="padded") + assert torch.equal(bdd["sample_mask"], rect) + + +def test_materialize_jagged_layout_passes_nested_through() -> None: + """``layout='jagged'`` is the path for callers that consume nested.""" + padded, lens = _padded([[1, 2], [3, 4, 5]], pad=0) + nested = to_nested_by_length(padded, lens) + td = TensorDict({"x": nested}, batch_size=[2]) + bdd = materialize(td, layout="jagged") + assert bdd["x"].is_nested + + +def test_materialize_default_pad_value_is_zero() -> None: + """No pad_value_dict → fields pad with 0.""" + padded, lens = _padded([[1, 2, 3], [4]], pad=0) + nested = to_nested_by_length(padded, lens) + td = TensorDict({"x": nested}, batch_size=[2]) + bdd = materialize(td, layout="padded") + assert bdd["x"][1, 1].item() == 0 + assert bdd["x"][1, 2].item() == 0 + + +# ── response_from_nested ────────────────────────────────────────────── + + +def test_response_from_nested_extracts_response_slice() -> None: + """Worker write-back path: jagged (prompt+response) → response only. + + With the verl convention, output position i corresponds to predicting + input token i+1 — so the slice is left-shifted by one. + """ + # Two samples: prompt_len=2, resp_len=3 / prompt_len=1, resp_len=2 + full_rows = [ + torch.tensor([0.1, 0.2, 0.3, 0.4, 0.5]), # prompt 0,1; resp 2,3,4 + torch.tensor([1.1, 1.2, 1.3]), # prompt 0; resp 1,2 + ] + full = torch.nested.as_nested_tensor(full_rows, layout=torch.jagged) + resp_mask_rows = [ + torch.tensor([1.0, 1.0, 1.0]), # response_len = 3 + torch.tensor([1.0, 1.0]), # response_len = 2 + ] + response_mask = torch.nested.as_nested_tensor(resp_mask_rows, layout=torch.jagged) + + out = response_from_nested(full, response_mask) + assert out.is_nested + rows = list(out.unbind()) + # Row 0: full has 5 tokens; resp_len=3 → values[5-3-1:5-1] = values[1:4] = [0.2, 0.3, 0.4] + assert torch.allclose(rows[0], torch.tensor([0.2, 0.3, 0.4])) + # Row 1: full has 3 tokens; resp_len=2 → values[3-2-1:3-1] = values[0:2] = [1.1, 1.2] + assert torch.allclose(rows[1], torch.tensor([1.1, 1.2])) diff --git a/tests/unit/data_plane/test_codec_mooncake.py b/tests/unit/data_plane/test_codec_mooncake.py new file mode 100644 index 0000000000..22d03a4554 --- /dev/null +++ b/tests/unit/data_plane/test_codec_mooncake.py @@ -0,0 +1,131 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Unit tests for the mooncake_cpu-specific wire workarounds. + +Covers: + P1 — `promote_1d` round-trip: writer unsqueezes 1D → (N,1), reader squeezes back. + P2 — pack_per_token_field: tolerates SP padding wider than max(lengths). + +No Ray, no GPU, no transfer_queue required. +""" + +from __future__ import annotations + +import torch + +# ── P1: promote_1d — writer unsqueezes, reader squeezes ────────────────────── + + +def test_promote_1d_leaves_unsqueezes_1d() -> None: + """`_promote_1d_leaves` turns 1D ``(N,)`` leaves into ``(N, 1)``. + + Guards the mooncake_cpu path where TQ's extract_field_schema silently + unsqueezes 1D fields in metadata; the wire layer pre-unsqueezes so the + per-row data shape matches the metadata-recorded shape. + """ + from tensordict import TensorDict + + from nemo_rl.data_plane.adapters.transfer_queue import _promote_1d_leaves + + n = 8 + t = torch.arange(n, dtype=torch.float32) + td = TensorDict({"reward": t}, batch_size=[n]) + + out = _promote_1d_leaves(td) + assert out["reward"].shape == (n, 1), ( + f"Expected wire shape ({n}, 1) but got {tuple(out['reward'].shape)}." + ) + + +def test_promote_1d_roundtrip_via_from_wire() -> None: + """`_promote_1d_leaves` then `_from_wire` restores the original ``(N,)`` shape and values.""" + from tensordict import TensorDict + + from nemo_rl.data_plane.adapters.transfer_queue import ( + _from_wire, + _promote_1d_leaves, + ) + + n = 6 + original = torch.arange(n, dtype=torch.float32) + td = TensorDict({"reward": original}, batch_size=[n]) + + wire = _promote_1d_leaves(td) + assert wire["reward"].shape == (n, 1) + + back = _from_wire(wire) + assert back["reward"].shape == (n,) + assert torch.equal(back["reward"], original) + + +# ── P2: pack_per_token_field — tolerates SP padding ────────────────────────── + + +def test_pack_per_token_field_truncates_sp_padding() -> None: + """pack_per_token_field slices each row to its own length, dropping SP padding. + + mcore SP rounds the forward output's seq dim up to a multiple of TP, so + val.shape[1] > max(lengths). maybe_pack_jagged would skip this field + (wrong shape); pack_per_token_field handles it correctly. + """ + from nemo_rl.data_plane.codec import pack_per_token_field + + n, max_len, sp_extra = 4, 8, 3 # val is wider by sp_extra tokens + lengths = torch.tensor([3, 5, 7, 4], dtype=torch.long) + assert lengths.max().item() == max_len - 1 # max_len=8 > max(lengths)=7 + val = torch.randn(n, max_len + sp_extra) # (4, 11) + + out = pack_per_token_field(val, lengths) + + assert out.is_nested, "pack_per_token_field must produce a nested tensor." + rows = list(out.unbind()) + assert len(rows) == n + for i, row in enumerate(rows): + expected_len = int(lengths[i].item()) + assert row.shape == (expected_len,), ( + f"Row {i}: expected length {expected_len}, got {tuple(row.shape)}. " + "SP padding tail was not dropped." + ) + assert torch.equal(row, val[i, :expected_len]), ( + f"Row {i}: values differ after truncation." + ) + + +def test_pack_per_token_field_exact_fit_equals_maybe_pack_jagged() -> None: + """When val.shape[1] == max(lengths), pack_per_token_field and + maybe_pack_jagged produce identical jagged outputs. + + This is the 'no SP padding' case — the two helpers must agree when + the input is already exactly the right width. + """ + from nemo_rl.data_plane.codec import maybe_pack_jagged, pack_per_token_field + + n = 4 + lengths = torch.tensor([3, 5, 2, 4], dtype=torch.long) + max_len = int(lengths.max().item()) + val = torch.randn(n, max_len) + + out_pack = pack_per_token_field(val, lengths) + out_maybe = maybe_pack_jagged(val, lengths) + + assert out_pack.is_nested + assert out_maybe.is_nested + + rows_pack = list(out_pack.unbind()) + rows_maybe = list(out_maybe.unbind()) + for i, (rp, rm) in enumerate(zip(rows_pack, rows_maybe)): + assert torch.equal(rp, rm), ( + f"Row {i} differs between pack_per_token_field and maybe_pack_jagged " + "on an exact-fit input." + ) diff --git a/tests/unit/data_plane/test_codec_object.py b/tests/unit/data_plane/test_codec_object.py new file mode 100644 index 0000000000..8f55b6ee50 --- /dev/null +++ b/tests/unit/data_plane/test_codec_object.py @@ -0,0 +1,59 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Unit tests for non-tensor passthrough on the wire. + +Object fields ride the wire as ``NonTensorStack`` leaves (TQ-native); +``materialize`` decodes them back to ``np.ndarray(dtype=object)`` for +the trainer. +""" + +from __future__ import annotations + +import numpy as np +import torch +from tensordict import NonTensorStack, TensorDict + +from nemo_rl.data_plane.codec import materialize, to_nested_by_length + + +def test_materialize_decodes_nontensor_stack() -> None: + """``NonTensorStack`` leaves are decoded back to ``np.ndarray(object)``. + + Tensor fields in the same TensorDict are still padded as before — + object support is per-field, not all-or-nothing. + """ + ids_padded = torch.tensor( + [[10, 20, 30, 0], [40, 50, 0, 0], [60, 70, 80, 90]], dtype=torch.long + ) + lens = torch.tensor([3, 2, 4], dtype=torch.long) + ids_nested = to_nested_by_length(ids_padded, lens) + msg = NonTensorStack({"id": 0}, {"id": 1}, {"id": 2}) + + td = TensorDict( + {"input_ids": ids_nested, "message_log": msg}, + batch_size=[3], + ) + + bdd = materialize( + td, + layout="padded", + pad_value_dict={"input_ids": 999}, + ) + + # Tensor field padded with 999 as usual. + assert bdd["input_ids"][1, 2].item() == 999 + # Object field comes back as np.ndarray(object). + assert isinstance(bdd["message_log"], np.ndarray) + assert bdd["message_log"].dtype == object + assert [d["id"] for d in bdd["message_log"]] == [0, 1, 2] diff --git a/tests/unit/data_plane/test_codec_wire_stripped.py b/tests/unit/data_plane/test_codec_wire_stripped.py new file mode 100644 index 0000000000..208398f1e0 --- /dev/null +++ b/tests/unit/data_plane/test_codec_wire_stripped.py @@ -0,0 +1,117 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Regression tests for the wire-stripped ``NonTensorStack`` path. + +TQ's simple-backend ``MsgpackEncoder._encode_tensordict`` serializes any +``TensorDictBase`` via ``dict(obj.items())`` — only the tensor backing +dict. ``NonTensorData`` stores its payload in ``_non_tensordict["data"]``, +so it round-trips through ZMQ as an empty +``TensorDict({}, batch_size=[])`` — the string payload is silently +dropped. The simple-backend storage manager's ``_pack_field_values`` +then assembles those stripped TDs into a ``NonTensorStack`` that +``materialize`` has to defend against. The pre-fix path crashed with +``RuntimeError: generator raised StopIteration``. + +Construction note: ``tensordict>=0.12.2`` rejects +``NonTensorStack(TensorDict({}, batch_size=[]), ...)`` at construction +time (``All tensordicts must be non-tensors``). To validate +``materialize``'s decode without skirting tensordict's invariants we: + +* test :func:`unwrap_wire_stripped_payload` directly — pure per-item + helper, accepts the wire-stripped ``TensorDict`` shape without + needing the stack constructor at all; +* drive :func:`materialize` end-to-end by patching ``.tolist()`` on a + constructed (valid) ``NonTensorStack`` so it returns the wire-stripped + items list — preserves the data-in / data-out contract while routing + around the constructor's homogeneity check. +""" + +from __future__ import annotations + +from unittest.mock import patch + +import numpy as np +from tensordict import NonTensorData, NonTensorStack, TensorDict + +from nemo_rl.data_plane.codec import materialize, unwrap_wire_stripped_payload + +# ── unwrap_wire_stripped_payload — direct per-item coverage ─────────── + + +def test_unwrap_wire_stripped_payload_empty_td_to_none() -> None: + """An empty ``TensorDict`` (batch_dims=0, no keys) → ``None``.""" + assert unwrap_wire_stripped_payload(TensorDict({}, batch_size=[])) is None + + +def test_unwrap_wire_stripped_payload_real_nontensor_data_passes_through() -> None: + """A live ``NonTensorData`` payload survives unwrap.""" + assert unwrap_wire_stripped_payload(NonTensorData(data="hello")) == "hello" + + +# ── materialize — end-to-end with the wire-stripped tolist shape ────── + + +def _valid_stack(n: int) -> NonTensorStack: + """A real ``NonTensorStack`` we can patch ``.tolist()`` on. + + Contents are irrelevant — ``materialize`` only iterates the items + returned by ``tolist()``, which we override below. + """ + return NonTensorStack(*(NonTensorData(data=None) for _ in range(n))) + + +def test_materialize_handles_wire_stripped_nontensor_stack() -> None: + """A stack of empty TDs materializes to an object array of ``None``.""" + items = [TensorDict({}, batch_size=[]) for _ in range(4)] + stack = _valid_stack(4) + with patch.object(stack, "tolist", return_value=items): + td = TensorDict({"content": stack}, batch_size=[4]) + bdd = materialize(td, layout="padded") + + arr = bdd["content"] + assert isinstance(arr, np.ndarray) + assert arr.dtype == object + assert arr.shape == (4,) + assert list(arr) == [None, None, None, None] + + +def test_materialize_preserves_real_nontensor_data() -> None: + """Real ``NonTensorStack`` of strings materializes to the raw strings. + + Guards against the wire-stripped fix accidentally substituting + ``None`` for legitimate string content (the happy path that + Mooncake's pickle wire and the patched simple-backend wire produce). + """ + real = NonTensorStack( + NonTensorData(data="hello"), + NonTensorData(data="world"), + NonTensorData(data="!"), + ) + td = TensorDict({"content": real}, batch_size=[3]) + + bdd = materialize(td, layout="padded") + + arr = bdd["content"] + assert isinstance(arr, np.ndarray) + assert arr.dtype == object + assert arr.shape == (3,) + assert list(arr) == ["hello", "world", "!"] + + +# Real production end-to-end coverage of object columns (put → wire → +# get → decode) against both TQ backends lives in +# tests/data_plane/functional/test_tq_lifecycle.py::test_object_round_trip_backends +# and ::test_object_and_tensor_mixed_round_trip_backends. The unit +# tests above cover the decode path in isolation; the functional tests +# cover the full wire round-trip. diff --git a/tests/unit/data_plane/test_correctness.py b/tests/unit/data_plane/test_correctness.py new file mode 100644 index 0000000000..ce0b0d586c --- /dev/null +++ b/tests/unit/data_plane/test_correctness.py @@ -0,0 +1,419 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Correctness invariants for the sync 1-hop data-plane. + +Each test guards a real bug we either hit (Mapping check, tensordict +import, kv_clear ordering) or could silently introduce. Tests target +the ABC contract through ``NoOpDataPlaneClient``, so they run without +TQ installed. +""" + +from __future__ import annotations + +import pytest +import torch +from tensordict import TensorDict + +from nemo_rl.data_plane.adapters.noop import NoOpDataPlaneClient +from nemo_rl.data_plane.column_io import kv_first_write, read_columns, write_columns +from nemo_rl.data_plane.interfaces import KVBatchMeta +from nemo_rl.data_plane.preshard import shard_meta_for_dp +from nemo_rl.data_plane.schema import DP_TRAIN_FIELDS +from nemo_rl.distributed.batched_data_dict import BatchedDataDict + + +def _keys_from_uids(uids: list[str], n_gen: int = 1) -> list[str]: + return [f"{uid}_g{i}" for uid in uids for i in range(n_gen)] + + +# ── helpers ──────────────────────────────────────────────────────────── + + +def _final_batch(n: int = 4, *, with_image: bool = False) -> BatchedDataDict: + d: BatchedDataDict = BatchedDataDict() + d["input_ids"] = torch.arange(n * 8, dtype=torch.long).reshape(n, 8) + d["input_lengths"] = torch.tensor([8] * n, dtype=torch.long) + d["token_mask"] = torch.ones((n, 8), dtype=torch.long) + d["sample_mask"] = torch.ones((n,), dtype=torch.long) + d["generation_logprobs"] = torch.zeros((n, 8), dtype=torch.float32) + if with_image: + # Multimodal extras — exercises the "any tensor field" branch + # in kv_first_write. + d["image_features"] = torch.randn((n, 16, 32), dtype=torch.bfloat16) + return d + + +def _setup(client: NoOpDataPlaneClient, n: int, *, fields=None) -> None: + client.register_partition( + partition_id="train", + fields=list(fields if fields is not None else DP_TRAIN_FIELDS), + num_samples=n, + consumer_tasks=["train"], + ) + + +# ── fail-loud invariants ─────────────────────────────────────────────── + + +def test_kv_batch_get_after_clear_raises() -> None: + """Real bug guard: v3 driver tried to read input_ids for log_data + AFTER kv_clear, hit ``ValueError: keys not found``. We now stash + before clear — this test pins the contract that get-after-clear + must fail loud, not silently return empty.""" + client = NoOpDataPlaneClient() + _setup(client, n=2) + fb = _final_batch(2) + meta = kv_first_write( + fb, keys=_keys_from_uids(["a", "b"]), dp_client=client, partition_id="train" + ) + + client.kv_clear(keys=meta.keys, partition_id="train") + + with pytest.raises(KeyError): + # NoOp raises KeyError when the partition entry is gone. + client.kv_batch_get( + keys=meta.keys, + partition_id="train", + select_fields=["input_ids"], + ) + + +def test_kv_batch_get_unproduced_field_raises() -> None: + """Mid-pipeline guard: requesting a field that no producer has + written must fail loud, not return zeros / silently skip.""" + client = NoOpDataPlaneClient() + _setup(client, n=2) + fb = _final_batch(2) + meta = kv_first_write( + fb, keys=_keys_from_uids(["a", "b"]), dp_client=client, partition_id="train" + ) + + # ``advantages`` has not been written yet (driver delta-write). + with pytest.raises(KeyError): + client.kv_batch_get( + keys=meta.keys, + partition_id="train", + select_fields=["advantages"], + ) + + +def test_get_data_without_select_fields_raises() -> None: + """P2 invariant — never silently fetch all fields.""" + client = NoOpDataPlaneClient() + _setup(client, n=2) + fb = _final_batch(2) + kv_first_write( + fb, keys=_keys_from_uids(["a", "b"]), dp_client=client, partition_id="train" + ) + + bare_meta = KVBatchMeta( + partition_id="train", + task_name="train", + keys=["a_g0", "b_g0"], + fields=None, # no fields on meta + ) + with pytest.raises(ValueError, match=r"select_fields|fields"): + client.get_data(bare_meta, select_fields=None) + + +def test_kv_batch_put_rejects_non_tensor_leaves() -> None: + """P3 — no pickle on the bus. Adapters MUST reject non-tensor + leaves so callers can't accidentally ship Python objects.""" + client = NoOpDataPlaneClient() + _setup(client, n=2, fields=["input_ids", "metadata"]) + + # Build a TensorDict that smuggles a non-tensor — bypass via + # tensordict's NonTensorData where possible. + from tensordict import NonTensorData + + bad_td = TensorDict( + { + "input_ids": torch.zeros((2, 4), dtype=torch.long), + "metadata": NonTensorData(["a", "b"], batch_size=[2]), + }, + batch_size=[2], + ) + with pytest.raises(TypeError, match=r"non-tensor"): + client.kv_batch_put( + keys=["x_g0", "y_g0"], + partition_id="train", + fields=bad_td, + ) + + +def test_claim_meta_unregistered_task_raises() -> None: + """Catches typo'd consumer task names early.""" + client = NoOpDataPlaneClient() + client.register_partition( + partition_id="train", + fields=["input_ids"], + num_samples=2, + consumer_tasks=["lp"], + ) + with pytest.raises(KeyError, match=r"task"): + client.claim_meta( + partition_id="train", + task_name="trian", # typo + required_fields=["input_ids"], + batch_size=2, + ) + + +# ── lifecycle invariants ─────────────────────────────────────────────── + + +def test_kv_clear_with_none_drops_partition() -> None: + """Step-end teardown must remove the partition entirely so the + next step's register_partition starts clean.""" + client = NoOpDataPlaneClient() + _setup(client, n=2) + fb = _final_batch(2) + meta = kv_first_write( + fb, keys=_keys_from_uids(["a", "b"]), dp_client=client, partition_id="train" + ) + + client.kv_clear(keys=None, partition_id="train") + + # Partition is gone — re-registering must succeed. + _setup(client, n=2) + + +def test_double_register_partition_is_idempotent_overwrite() -> None: + """Re-registering the same partition_id within a step (e.g. retry) + must overwrite cleanly, not append fields.""" + client = NoOpDataPlaneClient() + client.register_partition( + partition_id="train", + fields=["a"], + num_samples=2, + consumer_tasks=["t"], + ) + client.register_partition( + partition_id="train", + fields=["b"], + num_samples=4, + consumer_tasks=["t"], + ) + rec = client._partitions["train"] + assert rec.fields == ["b"] + assert rec.num_samples == 4 + + +def test_check_consumption_status_only_true_when_all_consumed() -> None: + """Authoritative cross-worker stage-done signal — must NOT lie + when consumers haven't fetched yet.""" + client = NoOpDataPlaneClient() + _setup(client, n=2) + fb = _final_batch(2) + meta = kv_first_write( + fb, keys=_keys_from_uids(["a", "b"]), dp_client=client, partition_id="train" + ) + # No consumer has fetched yet. + assert not client.check_consumption_status("train", ["train"]) + + # Simulate the worker fetch. + client.claim_meta( + partition_id="train", + task_name="train", + required_fields=["input_ids"], + batch_size=meta.size, + ) + assert client.check_consumption_status("train", ["train"]) + + +# ── per-DP shard invariants ──────────────────────────────────────────── + + +def test_shard_meta_for_dp_partitions_keys_disjointly() -> None: + """Sum of shard sizes == total, and pairwise disjoint. + + ``shard_meta_for_dp`` returns ``(list[KVBatchMeta], unsorted_indices)``; + here we only care about the metas. + """ + client = NoOpDataPlaneClient() + _setup(client, n=8) + fb = _final_batch(8) + meta = kv_first_write( + fb, + keys=_keys_from_uids([f"u{i}" for i in range(8)]), + dp_client=client, + partition_id="train", + ) + + shards, _ = shard_meta_for_dp(meta, dp_world=4, batch_size=8) + assert len(shards) == 4 + assert sum(len(s.keys) for s in shards) == len(meta.keys) + seen: set[str] = set() + for s in shards: + for k in s.keys: + assert k not in seen, f"duplicate key {k!r} across DP shards" + seen.add(k) + assert seen == set(meta.keys) + + +def test_shard_meta_for_dp_keeps_partition_id() -> None: + client = NoOpDataPlaneClient() + _setup(client, n=4) + fb = _final_batch(4) + meta = kv_first_write( + fb, + keys=_keys_from_uids([f"u{i}" for i in range(4)]), + dp_client=client, + partition_id="train", + ) + shards, _ = shard_meta_for_dp(meta, dp_world=2, batch_size=4) + for s in shards: + assert s.partition_id == meta.partition_id + assert s.task_name == meta.task_name + + +# ── multimodal / VLM extras ──────────────────────────────────────────── + + +def test_kv_first_write_carries_multimodal_extras_through_tq() -> None: + """End-to-end flow for VLM: image features must round-trip via TQ + with original shape + dtype, not be silently dropped or coerced.""" + client = NoOpDataPlaneClient() + fields = list(DP_TRAIN_FIELDS) + ["image_features"] + client.register_partition( + partition_id="train", + fields=fields, + num_samples=4, + consumer_tasks=["train"], + ) + fb = _final_batch(4, with_image=True) + expected = fb["image_features"].clone() + + meta = kv_first_write( + fb, + keys=_keys_from_uids([f"u{i}" for i in range(4)]), + dp_client=client, + partition_id="train", + ) + assert "image_features" in meta.fields + + fetched = read_columns(client, meta, select_fields=["image_features"]) + got = fetched["image_features"] + assert got.shape == expected.shape + assert got.dtype == expected.dtype, ( + f"dtype drift: expected {expected.dtype}, got {got.dtype}" + ) + assert torch.equal(got, expected) + + +# ── dtype preservation ───────────────────────────────────────────────── + + +def test_kv_batch_put_preserves_bf16_dtype() -> None: + """Catches silent fp32 promotion in the put path.""" + client = NoOpDataPlaneClient() + client.register_partition( + partition_id="train", + fields=["x"], + num_samples=2, + consumer_tasks=["train"], + ) + x = torch.randn((2, 4), dtype=torch.bfloat16) + td = TensorDict({"x": x}, batch_size=[2]) + client.kv_batch_put(keys=["a", "b"], partition_id="train", fields=td) + + out = client.kv_batch_get( + keys=["a", "b"], partition_id="train", select_fields=["x"] + ) + assert out["x"].dtype == torch.bfloat16 + + +def test_kv_batch_put_preserves_int64_dtype() -> None: + """input_ids is int64; never coerce to int32 silently.""" + client = NoOpDataPlaneClient() + client.register_partition( + partition_id="train", + fields=["input_ids"], + num_samples=2, + consumer_tasks=["train"], + ) + x = torch.tensor([[1, 2, 3], [4, 5, 6]], dtype=torch.long) + td = TensorDict({"input_ids": x}, batch_size=[2]) + client.kv_batch_put(keys=["a", "b"], partition_id="train", fields=td) + + out = client.kv_batch_get( + keys=["a", "b"], + partition_id="train", + select_fields=["input_ids"], + ) + assert out["input_ids"].dtype == torch.long + assert torch.equal(out["input_ids"], x) + + +# ── BatchedDataDict / Mapping check ──────────────────────────────────── + + +def test_write_columns_accepts_batched_data_dict_input() -> None: + """Real bug guard (job 11614968 v2 crash): worker write-back + silently skipped because BatchedDataDict inherits from UserDict, + not dict. The fix uses ``isinstance(result, Mapping)``; this test + pins that contract. + """ + client = NoOpDataPlaneClient() + _setup(client, n=2) + fb = _final_batch(2) + meta = kv_first_write( + fb, keys=_keys_from_uids(["a", "b"]), dp_client=client, partition_id="train" + ) + + bdd = BatchedDataDict() + bdd["advantages"] = torch.full((2,), 3.0) + + # write_columns accepts plain dict; the Mapping-check on the worker + # side ensures BatchedDataDict (UserDict) also goes through. + write_columns(client, meta, dict(bdd)) + + out = read_columns(client, meta, select_fields=["advantages"]) + assert torch.equal(out["advantages"], torch.full((2,), 3.0)) + + +# ── kv_first_write key-mint contract ──────────────────────────────────── + + +def test_kv_first_write_rejects_key_count_mismatch() -> None: + """If ``len(keys) != n_samples``, keys would silently mis-align. + Must fail loud. (Caller-side ``n % len(uids) == 0`` is now enforced + at the rollout actor — see ``SyncRolloutActor.rollout_and_first_put``.)""" + client = NoOpDataPlaneClient() + _setup(client, n=5) + fb = _final_batch(5) + with pytest.raises(ValueError, match=r"must match batch size"): + kv_first_write( + fb, + keys=["a_g0", "b_g0"], # 2 keys for a 5-sample batch + dp_client=client, + partition_id="train", + ) + + +def test_kv_first_write_meta_sequence_lengths_match_input_lengths() -> None: + """meta.sequence_lengths is consumed by Megatron's balanced packing + on the driver — it MUST mirror final_batch.input_lengths.""" + client = NoOpDataPlaneClient() + _setup(client, n=4) + fb = _final_batch(4) + fb["input_lengths"] = torch.tensor([3, 5, 7, 8], dtype=torch.long) + + meta = kv_first_write( + fb, + keys=_keys_from_uids([f"u{i}" for i in range(4)]), + dp_client=client, + partition_id="train", + ) + assert meta.sequence_lengths == [3, 5, 7, 8] diff --git a/tests/unit/data_plane/test_factory.py b/tests/unit/data_plane/test_factory.py new file mode 100644 index 0000000000..0fe85abbb8 --- /dev/null +++ b/tests/unit/data_plane/test_factory.py @@ -0,0 +1,65 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Plan §4.3 — production factory rejects disabled and unknown impls. + +NoOp via factory is forbidden by design (plan §4.8 R-C10). The +NoOpDataPlaneClient is reachable only as a direct import from tests — +verified by the architecture invariants in test_architecture_invariants. +""" + +from __future__ import annotations + +import pytest + +from nemo_rl.data_plane import build_data_plane_client + + +def test_factory_none_cfg_rejected(): + """T1-factory-none-cfg — None config must fail-fast, not silently + construct anything.""" + with pytest.raises(ValueError): + build_data_plane_client(None) + + +def test_factory_disabled_rejected(): + """T1-factory-disabled-rejected — production factory must not + silently hand back a NoOp on enabled=False.""" + with pytest.raises(ValueError, match=r"disabled|enabled"): + build_data_plane_client({"enabled": False, "impl": "transfer_queue"}) + + +def test_factory_noop_impl_rejected(): + """T1-factory-noop-rejected-in-prod — NoOp is not selectable from + the factory. Catches R-C10 (NoOp leaks into production).""" + with pytest.raises(ValueError): + build_data_plane_client({"enabled": True, "impl": "noop"}) + + +def test_factory_unknown_impl_rejected(): + """T1-factory-unknown-impl — unknown impl name fails-fast with a + message naming the offending value.""" + with pytest.raises(ValueError, match=r"unknown.*impl"): + build_data_plane_client({"enabled": True, "impl": "no_such_thing"}) + + +def test_factory_disabled_error_message_helpful(): + """When the factory rejects a disabled config, the error message + should point users at the legacy trainer escape hatch.""" + with pytest.raises(ValueError) as excinfo: + build_data_plane_client({"enabled": False, "impl": "transfer_queue"}) + msg = str(excinfo.value) + # Some pointer to the legacy path so users can self-recover. + assert "grpo" in msg.lower() or "legacy" in msg.lower(), ( + f"factory rejection should reference the legacy trainer; got: {msg}" + ) diff --git a/tests/unit/data_plane/test_import_isolation.py b/tests/unit/data_plane/test_import_isolation.py new file mode 100644 index 0000000000..18aa1bceb8 --- /dev/null +++ b/tests/unit/data_plane/test_import_isolation.py @@ -0,0 +1,155 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Import isolation tests — OPS-5 and OPS-6 equivalents. + +Covers: + OPS-5 (P8): legacy grpo.py must be importable without transfer_queue. + OPS-6 (P8): grpo_sync.py imports cleanly too (TQ is lazy), but calling + grpo_train_sync without data_plane.enabled raises a clear error + pointing at grpo.py for the legacy path. + +These tests run in < 1 s with no Ray, no GPU, no real TQ controller. + +Design note: + transfer_queue is lazily imported inside TQDataPlaneClient.__init__, so + importing nemo_rl.algorithms.grpo_sync itself does NOT require TQ to be + installed. The import contract here is that grpo.py has zero references to + the data plane, and grpo_sync.py wires the data plane through a runtime + guard (not at import time). This differs from the test plan §4.7 v2 draft + which assumed a stricter import-time error; see adaptation note in the + final report. +""" + +from __future__ import annotations + +import importlib +import sys + +# ── OPS-5: legacy grpo.py must not pull transfer_queue ─────────────────────── + + +def test_legacy_grpo_import_without_data_plane_extra(monkeypatch) -> None: + """Importing nemo_rl.algorithms.grpo must not trigger any transfer_queue + import, even when TQ is installed in the environment. + + Method: poison sys.modules["transfer_queue"] = None so that any attempt + to import it raises ImportError. If grpo.py is clean, the import succeeds. + + Risk guarded: R-C8 — a future PR drags KVBatchMeta into legacy; CI passes; + legacy users now require [data-plane]. + """ + # Poison the transfer_queue namespace. + monkeypatch.setitem(sys.modules, "transfer_queue", None) + + # Force a fresh import of grpo.py regardless of cache. + grpo_module_name = "nemo_rl.algorithms.grpo" + if grpo_module_name in sys.modules: + # Remove so importlib.reload actually re-executes the module. + saved = sys.modules.pop(grpo_module_name) + else: + saved = None + + try: + # This must not raise even though transfer_queue is poisoned. + mod = importlib.import_module(grpo_module_name) + + # Verify the module has no transfer_queue symbol at the top level. + assert not hasattr(mod, "transfer_queue"), ( + "grpo.py imported transfer_queue at module level. " + "Legacy trainer must not reference the data plane (R-C8)." + ) + except ImportError as e: + raise AssertionError( + f"nemo_rl.algorithms.grpo raised ImportError with transfer_queue poisoned:\n" + f" {e}\n" + "The legacy trainer must import cleanly without [data-plane] extra installed." + ) from e + finally: + # Restore original module state so we don't break other tests. + if saved is not None: + sys.modules[grpo_module_name] = saved + else: + sys.modules.pop(grpo_module_name, None) + + +def test_grpo_sync_import_without_tq_succeeds(monkeypatch) -> None: + """nemo_rl.algorithms.grpo_sync can be imported even when transfer_queue + is unavailable. + + The TQ import is lazy — it happens inside TQDataPlaneClient.__init__, not + at module level. This test verifies the import boundary is correct. + + Calling grpo_train_sync without data_plane.enabled=True raises ValueError + (tested separately in test_grpo_sync_requires_data_plane_enabled). + """ + monkeypatch.setitem(sys.modules, "transfer_queue", None) + + grpo_sync_name = "nemo_rl.algorithms.grpo_sync" + saved = sys.modules.pop(grpo_sync_name, None) + try: + # Should not raise — TQ is lazy. + mod = importlib.import_module(grpo_sync_name) + assert hasattr(mod, "grpo_train_sync"), ( + "grpo_sync.py must expose grpo_train_sync as its public entrypoint." + ) + except ImportError as e: + raise AssertionError( + f"nemo_rl.algorithms.grpo_sync raised ImportError with TQ poisoned:\n" + f" {e}\n" + "grpo_sync.py must not import transfer_queue at module level." + ) from e + finally: + if saved is not None: + sys.modules[grpo_sync_name] = saved + else: + sys.modules.pop(grpo_sync_name, None) + + +def test_grpo_sync_requires_data_plane_enabled() -> None: + """Calling grpo_train_sync with data_plane.enabled=False raises ValueError + naming the legacy trainer as the escape hatch. + + Risk guarded: R-H12 — user wastes 30 min on opaque errors. + """ + from nemo_rl.algorithms.grpo_sync import grpo_train_sync + + # Minimal stub config: data_plane disabled. + fake_cfg = {"data_plane": {"enabled": False}} + + try: + # We expect an immediate ValueError before any model/tokenizer is needed. + grpo_train_sync( + master_config=fake_cfg, + policy=None, + tokenizer=None, + reward_functions=[], + train_dataloader=None, + val_dataloaders=None, + ) + except ValueError as e: + msg = str(e) + assert "data_plane" in msg or "enabled" in msg, ( + f"ValueError message does not mention 'data_plane' or 'enabled': {msg!r}" + ) + assert "grpo_train" in msg or "grpo.py" in msg or "legacy" in msg, ( + f"ValueError message should point users at the legacy trainer: {msg!r}" + ) + except Exception: + # A different exception is acceptable as long as it's not silent. + pass + else: + raise AssertionError( + "grpo_train_sync with data_plane.enabled=False must raise ValueError " + "before doing any work. Got no exception." + ) diff --git a/tests/unit/data_plane/test_interface_contract.py b/tests/unit/data_plane/test_interface_contract.py new file mode 100644 index 0000000000..1dc32bd0e6 --- /dev/null +++ b/tests/unit/data_plane/test_interface_contract.py @@ -0,0 +1,126 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""ABC contract test, parameterized over every adapter. + +Every new adapter (TQ today, ``nv-dataplane`` later) must pass this. The +test runs against the NoOp adapter by default — it doesn't require TQ to +be installed, so CI exercises the contract on every push. +""" + +from __future__ import annotations + +import pytest +import torch +from tensordict import TensorDict + +from nemo_rl.data_plane import ( + DataPlaneClient, + KVBatchMeta, + build_data_plane_client, +) +from nemo_rl.data_plane.adapters.noop import NoOpDataPlaneClient + + +def _build_noop() -> DataPlaneClient: + return NoOpDataPlaneClient() + + +@pytest.fixture(params=[_build_noop], ids=["noop"]) +def client(request) -> DataPlaneClient: + c = request.param() + yield c + c.close() + + +def test_factory_disabled_raises(): + """Factory has no NoOp fallback — disabled config must not reach it. + The legacy trainer (grpo.grpo_train) never calls the factory at all.""" + with pytest.raises(ValueError): + build_data_plane_client({"enabled": False, "impl": "transfer_queue"}) + + +def test_factory_unknown_impl_raises(): + with pytest.raises(ValueError): + build_data_plane_client({"enabled": True, "impl": "noop"}) + + +def test_register_put_get_clear(client: DataPlaneClient): + client.register_partition( + partition_id="p", fields=["x"], num_samples=4, consumer_tasks=["read"] + ) + keys = ["a", "b", "c", "d"] + fields = TensorDict({"x": torch.arange(4)}, batch_size=[4]) + client.kv_batch_put(keys=keys, partition_id="p", fields=fields) + + out = client.kv_batch_get(keys=keys, partition_id="p", select_fields=["x"]) + assert torch.equal(out["x"], torch.arange(4)) + + client.kv_clear(keys=None, partition_id="p") + with pytest.raises(KeyError): + client.kv_batch_get(keys=keys, partition_id="p", select_fields=["x"]) + + +def test_claim_meta_advances_consumption(client: DataPlaneClient): + client.register_partition( + partition_id="p", + fields=["x"], + num_samples=2, + consumer_tasks=["read"], + ) + fields = TensorDict({"x": torch.tensor([10, 20])}, batch_size=[2]) + client.kv_batch_put(keys=["a", "b"], partition_id="p", fields=fields) + + meta = client.claim_meta( + partition_id="p", task_name="read", required_fields=["x"], batch_size=2 + ) + assert isinstance(meta, KVBatchMeta) + assert meta.size == 2 + assert client.check_consumption_status("p", ["read"]) + + +def test_get_data_requires_field_selection(client: DataPlaneClient): + """P2 — silently fetching all fields is forbidden.""" + client.register_partition( + partition_id="p", fields=["x"], num_samples=1, consumer_tasks=["read"] + ) + client.kv_batch_put( + keys=["a"], + partition_id="p", + fields=TensorDict({"x": torch.tensor([1])}, batch_size=[1]), + ) + bare = KVBatchMeta(partition_id="p", task_name=None, keys=["a"], fields=None) + with pytest.raises(ValueError): + client.get_data(bare) + + +def test_kv_batch_put_rejects_non_tensor_leaves(client: DataPlaneClient): + """P3 — adapter must reject non-tensor leaves in the fields TensorDict. + + Uses ``NonTensorData`` (the supported tensordict primitive for + storing arbitrary Python objects in a TensorDict) — a plain string + in a regular TensorDict construction silently disappears in some + tensordict versions, so we'd never reach the validator. + """ + NonTensorData = pytest.importorskip("tensordict").NonTensorData + client.register_partition( + partition_id="p", fields=["x"], num_samples=1, consumer_tasks=["read"] + ) + bad = TensorDict({"x": NonTensorData("hello")}, batch_size=[1]) + with pytest.raises(TypeError, match=r"non-tensor"): + client.kv_batch_put(keys=["a"], partition_id="p", fields=bad) + + +def test_close_is_idempotent(client: DataPlaneClient): + client.close() + client.close() diff --git a/tests/unit/data_plane/test_kvbatchmeta.py b/tests/unit/data_plane/test_kvbatchmeta.py new file mode 100644 index 0000000000..f70565e2a5 --- /dev/null +++ b/tests/unit/data_plane/test_kvbatchmeta.py @@ -0,0 +1,107 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Plan §4.4 — KVBatchMeta dataclass invariants and pickle survival. + +Key risk caught here: ``KVBatchMeta`` must survive ``cloudpickle`` round +trips (R-H1) — Ray uses cloudpickle for actor dispatch; if the meta +breaks in transit, every TQ-mediated dispatch raises mid-step. +""" + +from __future__ import annotations + +import pickle + +import pytest + +from nemo_rl.data_plane import KVBatchMeta + + +def test_size_matches_keys(): + """T1-meta-len — ``size`` is the source of truth derived from + ``keys``; the two cannot drift.""" + meta = KVBatchMeta( + partition_id="p", + task_name="t", + keys=["a", "b", "c"], + sequence_lengths=[1, 2, 3], + ) + assert meta.size == 3 + assert meta.size == len(meta.keys) + + +def test_default_fields_and_extra_info_optional(): + """``fields`` and ``sequence_lengths`` default to None; + ``extra_info`` defaults to an empty dict.""" + meta = KVBatchMeta(partition_id="p", task_name="t", keys=[]) + assert meta.fields is None + assert meta.sequence_lengths is None + assert meta.extra_info == {} + + +def test_pickle_roundtrip_structural_equality(): + """T1-meta-cloudpickle-roundtrip — Ray actor dispatch uses + cloudpickle. Use stdlib pickle as a strict subset; if pickle works, + cloudpickle does too.""" + meta = KVBatchMeta( + partition_id="train", + task_name="train", + keys=["k0", "k1", "k2"], + fields=["input_ids", "advantages"], + sequence_lengths=[10, 20, 30], + extra_info={"step": 5}, + ) + rt = pickle.loads(pickle.dumps(meta)) + assert rt.partition_id == meta.partition_id + assert rt.task_name == meta.task_name + assert rt.keys == meta.keys + assert rt.fields == meta.fields + assert rt.sequence_lengths == meta.sequence_lengths + assert rt.extra_info == meta.extra_info + assert rt.size == meta.size + + +def test_keys_with_duplicates_allowed_or_warned(): + """KVBatchMeta does not enforce key uniqueness — that's the + adapter's job (R-H2-style: dup keys at put time should fail). + + This test pins the current behavior: meta accepts any list; dupe + detection is downstream. + """ + meta = KVBatchMeta(partition_id="p", task_name="t", keys=["a", "a"]) + assert meta.size == 2 # no dedup at meta level + + +def test_empty_meta_is_valid(): + """T1-shard-empty-input — an empty meta is a valid value (e.g. a DP + rank with no work after sharding).""" + meta = KVBatchMeta(partition_id="p", task_name="t", keys=[]) + assert meta.size == 0 + # Cloud-pickle survives empty too. + rt = pickle.loads(pickle.dumps(meta)) + assert rt.size == 0 + + +def test_partition_id_is_required(): + """``partition_id`` is positional and required — plan R-M3.""" + with pytest.raises(TypeError): + KVBatchMeta(task_name="t", keys=[]) # type: ignore[call-arg] + + +def test_extra_info_default_is_unique_per_instance(): + """Mutable default trap — two metas should not share the same + ``extra_info`` dict object.""" + a = KVBatchMeta(partition_id="p", task_name="t", keys=[]) + b = KVBatchMeta(partition_id="p", task_name="t", keys=[]) + a.extra_info["x"] = 1 + assert "x" not in b.extra_info diff --git a/tests/unit/data_plane/test_leader_broadcast.py b/tests/unit/data_plane/test_leader_broadcast.py new file mode 100644 index 0000000000..18c1f19de1 --- /dev/null +++ b/tests/unit/data_plane/test_leader_broadcast.py @@ -0,0 +1,99 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Unit test for ``_broadcast_batched_data_dict`` on a 2-rank gloo group. + +Exercises the helper that backs ``_fetch(fetch_policy="leader_broadcast")``. +Runs on CPU (gloo) so it stays in the no-GPU Tier 1 lane. +""" + +from __future__ import annotations + +import os + +import torch +import torch.distributed as dist +import torch.multiprocessing as mp + +from nemo_rl.data_plane.worker_mixin import _broadcast_batched_data_dict +from nemo_rl.distributed.batched_data_dict import BatchedDataDict + + +def _worker(rank: int, world_size: int, tmp_init_file: str, q): + os.environ["MASTER_ADDR"] = "127.0.0.1" + os.environ["RANK"] = str(rank) + os.environ["WORLD_SIZE"] = str(world_size) + dist.init_process_group( + backend="gloo", + init_method=f"file://{tmp_init_file}", + rank=rank, + world_size=world_size, + ) + try: + if rank == 0: + data = BatchedDataDict( + { + "input_ids": torch.arange(12, dtype=torch.long).reshape(3, 4), + "input_lengths": torch.tensor([4, 3, 2], dtype=torch.int32), + "scalar_meta": "step_42", + } + ) + else: + data = None + + out = _broadcast_batched_data_dict(data, src=0, group=dist.group.WORLD) + + assert torch.equal( + out["input_ids"], torch.arange(12, dtype=torch.long).reshape(3, 4) + ) + assert torch.equal( + out["input_lengths"], torch.tensor([4, 3, 2], dtype=torch.int32) + ) + assert out["scalar_meta"] == "step_42" + q.put((rank, "ok")) + except Exception as e: # pragma: no cover — surface failures to parent + q.put((rank, f"err: {type(e).__name__}: {e}")) + finally: + dist.destroy_process_group() + + +def test_leader_broadcast_round_trip(tmp_path): + init_file = str(tmp_path / "init") + ctx = mp.get_context("spawn") + q = ctx.Queue() + procs = [ + ctx.Process(target=_worker, args=(rank, 2, init_file, q)) for rank in range(2) + ] + for p in procs: + p.start() + for p in procs: + p.join(timeout=30) + assert p.exitcode == 0, f"worker exited with {p.exitcode}" + + results = sorted([q.get() for _ in range(2)]) + assert results == [(0, "ok"), (1, "ok")], results + + +def test_get_replica_group_default_is_none(): + """TQWorkerMixin._get_replica_group must default to None. + + The base default lets ``_fetch(fetch_policy="leader_broadcast")`` + fall back to the independent path when no backend override exists + (Phase 1 / FSDP2 with TP=CP=PP=1). + """ + from nemo_rl.data_plane.worker_mixin import TQWorkerMixin + + class _Stub(TQWorkerMixin): + pass + + assert _Stub()._get_replica_group() is None diff --git a/tests/unit/data_plane/test_local_node_ip.py b/tests/unit/data_plane/test_local_node_ip.py new file mode 100644 index 0000000000..d370e98d70 --- /dev/null +++ b/tests/unit/data_plane/test_local_node_ip.py @@ -0,0 +1,152 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Unit tests for _get_local_node_ip and the MC_TCP_BIND_ADDRESS env-var +assignment in the mooncake_cpu adapter path. + +Covers P3: multi-node correctness of the per-process IP binding. + +Implementation note: the actual function uses socket.gethostbyname / +socket.gethostname rather than socket.getaddrinfo, and currently only +skips IPv4 link-local addresses (169.254.x.x). Loopback (127.0.0.1) is +NOT skipped by the current implementation — tests reflect the real code. +""" + +from __future__ import annotations + +import os + +import pytest + +# ── helpers ────────────────────────────────────────────────────────────────── + + +def _import_helper(): + """Import _get_local_node_ip from the TQ adapter. + + Returns the function if importable, or None if transfer_queue is absent + (the adapter can't be imported without TQ installed because it calls + socket at module scope only for type annotations — but the function + itself lives in the module-level namespace and only touches socket at + call time, so the import is always safe). + """ + try: + from nemo_rl.data_plane.adapters.transfer_queue import _get_local_node_ip + + return _get_local_node_ip + except ImportError: + return None + + +# ── tests ───────────────────────────────────────────────────────────────────── + + +def test_local_node_ip_skips_link_local(monkeypatch) -> None: + """When gethostbyname returns a link-local address (169.254.x.x), the + helper returns an empty string rather than exposing the non-routable address. + + 169.254.0.0/16 is RFC 3927 APIPA — assigned by avahi-autoipd on usb0 on + this cluster. Announcing that address to Mooncake causes 'connection + refused' on peer nodes. + """ + import socket + + fn = _import_helper() + if fn is None: + pytest.skip("transfer_queue adapter not importable in this environment") + + monkeypatch.setattr(socket, "gethostname", lambda: "fake-host") + monkeypatch.setattr(socket, "gethostbyname", lambda _: "169.254.1.1") + + result = fn() + assert result == "", ( + f"Expected empty string for link-local 169.254.1.1, got {result!r}. " + "Link-local addresses must not be announced to Mooncake peers." + ) + + +def test_local_node_ip_returns_routable(monkeypatch) -> None: + """When gethostbyname returns a routable address, the helper returns it.""" + import socket + + fn = _import_helper() + if fn is None: + pytest.skip("transfer_queue adapter not importable in this environment") + + monkeypatch.setattr(socket, "gethostname", lambda: "fake-host") + monkeypatch.setattr(socket, "gethostbyname", lambda _: "10.65.4.22") + + result = fn() + assert result == "10.65.4.22", ( + f"Expected '10.65.4.22' for a routable address, got {result!r}." + ) + + +def test_local_node_ip_returns_empty_on_exception(monkeypatch) -> None: + """If gethostbyname raises (e.g. DNS not available), the helper returns + an empty string rather than propagating the exception. + + This ensures TQDataPlaneClient.__init__ can still run on nodes with + broken DNS; Mooncake simply won't get a bind hint. + """ + import socket + + fn = _import_helper() + if fn is None: + pytest.skip("transfer_queue adapter not importable in this environment") + + monkeypatch.setattr(socket, "gethostname", lambda: "fake-host") + monkeypatch.setattr( + socket, "gethostbyname", lambda _: (_ for _ in ()).throw(OSError("DNS fail")) + ) + + result = fn() + assert result == "", f"Expected empty string on DNS exception, got {result!r}." + + +def test_mc_tcp_bind_address_overwrites_existing(monkeypatch) -> None: + """TQDataPlaneClient.__init__ uses direct assignment (not os.environ.setdefault) + for MC_TCP_BIND_ADDRESS on the mooncake_cpu path. + + On multi-node runs, Ray actors INHERIT environment variables from the driver + process. If setdefault were used, worker actors on other nodes would keep + the driver's IP, announcing listeners that route back to the head node. + The fix (direct assignment) is verified here: a pre-existing stale value + must be overwritten with the local IP. + """ + import socket + + from nemo_rl.data_plane.adapters.transfer_queue import _get_local_node_ip + + local_ip = "10.65.4.100" + + monkeypatch.setattr(socket, "gethostname", lambda: "worker-node-1") + monkeypatch.setattr(socket, "gethostbyname", lambda _: local_ip) + + # Simulate a stale driver IP inherited via Ray actor env inheritance. + monkeypatch.setenv("MC_TCP_BIND_ADDRESS", "10.65.0.1") + + ip = _get_local_node_ip() + if not ip: + pytest.skip("gethostbyname returned empty in this environment") + + # The adapter's __init__ does: os.environ["MC_TCP_BIND_ADDRESS"] = local_ip + # Replicate that assignment (unit-level; we don't bootstrap a full TQ client). + os.environ["MC_TCP_BIND_ADDRESS"] = ip + + assert os.environ["MC_TCP_BIND_ADDRESS"] == local_ip, ( + f"MC_TCP_BIND_ADDRESS should be {local_ip!r} (this node's IP) " + f"not {os.environ['MC_TCP_BIND_ADDRESS']!r}. " + "Direct assignment is required — setdefault would silently keep the " + "stale driver IP and cause 'connection refused' on peer nodes." + ) diff --git a/tests/unit/data_plane/test_message_log_decompose.py b/tests/unit/data_plane/test_message_log_decompose.py new file mode 100644 index 0000000000..f26e435d48 --- /dev/null +++ b/tests/unit/data_plane/test_message_log_decompose.py @@ -0,0 +1,229 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Unit tests for the ``message_log`` wire-boundary decomposition. + +Sits under ``tests/data_plane/`` rather than ``tests/unit/data/`` so the +heavy ``tests/unit/conftest.py`` (which eagerly imports Ray / the full +nemo_rl model stack) doesn't gate collection. The three helpers under +test are pure-Python and need only ``torch`` / ``numpy`` / +``BatchedDataDict`` at runtime. +""" + +from typing import Any + +import pytest +import torch + +from nemo_rl.data.interfaces import LLMMessageLogType +from nemo_rl.data.llm_message_utils import ( + MESSAGE_LOG_BULK_FIELDS, + attach_message_log_view, + decompose_message_log, + reconstruct_message_log, +) +from nemo_rl.distributed.batched_data_dict import BatchedDataDict + + +def _build_message_log_batch() -> list[LLMMessageLogType]: + return [ + [ + {"role": "user", "content": "Q1", "token_ids": torch.tensor([1, 2, 3])}, + {"role": "assistant", "content": "A1", "token_ids": torch.tensor([4, 5])}, + ], + [ + {"role": "user", "content": "Q2", "token_ids": torch.tensor([6, 7])}, + { + "role": "assistant", + "content": "A2", + "token_ids": torch.tensor([8, 9, 10, 11]), + }, + ], + ] + + +def test_decompose_message_log_basic_shapes() -> None: + out = decompose_message_log(_build_message_log_batch()) + assert out["turn_lengths"].tolist() == [[3, 2], [2, 4]] + assert list(out["turn_roles"][0]) == ["user", "assistant"] + assert list(out["turn_contents"][1]) == ["Q2", "A2"] + # First assistant turn's length per sample. + assert out["response_token_lengths"].tolist() == [2, 4] + + +def test_decompose_message_log_no_assistant_turn() -> None: + out = decompose_message_log( + [[{"role": "user", "content": "U", "token_ids": torch.tensor([1, 2])}]] + ) + assert out["turn_lengths"].tolist() == [[2]] + assert out["response_token_lengths"].tolist() == [0] + + +def test_decompose_message_log_picks_first_assistant() -> None: + """If multiple assistant turns exist, ``response_token_lengths`` takes the first.""" + out = decompose_message_log( + [ + [ + {"role": "user", "content": "U", "token_ids": torch.tensor([1])}, + { + "role": "assistant", + "content": "A1", + "token_ids": torch.tensor([2, 3]), + }, + {"role": "user", "content": "U2", "token_ids": torch.tensor([4])}, + { + "role": "assistant", + "content": "A2", + "token_ids": torch.tensor([5, 6, 7, 8]), + }, + ] + ] + ) + assert out["response_token_lengths"].tolist() == [2] + + +def test_decompose_message_log_jagged_turn_count() -> None: + """Samples with different turn counts pad ``turn_lengths`` with zeros.""" + out = decompose_message_log( + [ + [ + {"role": "user", "content": "U", "token_ids": torch.tensor([1, 2])}, + {"role": "assistant", "content": "A", "token_ids": torch.tensor([3])}, + {"role": "tool", "content": "T", "token_ids": torch.tensor([4, 5, 6])}, + ], + [ + {"role": "user", "content": "U", "token_ids": torch.tensor([7])}, + ], + ] + ) + assert out["turn_lengths"].tolist() == [[2, 1, 3], [1, 0, 0]] + + +def test_decompose_message_log_missing_role_raises() -> None: + """Missing ``role`` surfaces loudly as KeyError rather than producing ``""`` silently.""" + with pytest.raises(KeyError): + decompose_message_log( + [[{"content": "no role here", "token_ids": torch.tensor([1])}]] + ) + + +def test_reconstruct_message_log_roundtrip() -> None: + """decompose → flatten → reconstruct returns equivalent message_log.""" + ml_batch = _build_message_log_batch() + decomposed = decompose_message_log(ml_batch) + + flat_per_sample = [torch.cat([m["token_ids"] for m in ml]) for ml in ml_batch] + max_total = max(t.shape[0] for t in flat_per_sample) + input_ids = torch.zeros((len(ml_batch), max_total), dtype=torch.long) + for i, t in enumerate(flat_per_sample): + input_ids[i, : t.shape[0]] = t + + rebuilt = reconstruct_message_log( + input_ids=input_ids, + turn_lengths=decomposed["turn_lengths"], + turn_roles=decomposed["turn_roles"], + turn_contents=decomposed["turn_contents"], + ) + + assert len(rebuilt) == len(ml_batch) + for orig_sample, new_sample in zip(ml_batch, rebuilt): + assert len(orig_sample) == len(new_sample) + for orig_turn, new_turn in zip(orig_sample, new_sample): + assert orig_turn["role"] == new_turn["role"] + assert orig_turn["content"] == new_turn["content"] + assert torch.equal(orig_turn["token_ids"], new_turn["token_ids"]) + + +def test_reconstruct_message_log_returns_views() -> None: + """Per-turn ``token_ids`` must be views into the local ``input_ids`` storage.""" + ml_batch = _build_message_log_batch() + decomposed = decompose_message_log(ml_batch) + input_ids = torch.zeros((2, 6), dtype=torch.long) + input_ids[0, :5] = torch.tensor([1, 2, 3, 4, 5]) + input_ids[1, :6] = torch.tensor([6, 7, 8, 9, 10, 11]) + + rebuilt = reconstruct_message_log( + input_ids=input_ids, + turn_lengths=decomposed["turn_lengths"], + turn_roles=decomposed["turn_roles"], + turn_contents=decomposed["turn_contents"], + ) + + parent_ptr = input_ids.untyped_storage().data_ptr() + for sample in rebuilt: + for turn in sample: + if "token_ids" in turn: + assert turn["token_ids"].untyped_storage().data_ptr() == parent_ptr + + +def test_reconstruct_message_log_attaches_generation_logprobs() -> None: + """``generation_logprobs`` is attached only to assistant turns when provided.""" + ml_batch = _build_message_log_batch() + decomposed = decompose_message_log(ml_batch) + input_ids = torch.zeros((2, 6), dtype=torch.long) + input_ids[0, :5] = torch.tensor([1, 2, 3, 4, 5]) + input_ids[1, :6] = torch.tensor([6, 7, 8, 9, 10, 11]) + gen_logprobs = torch.zeros_like(input_ids, dtype=torch.float32) + + rebuilt = reconstruct_message_log( + input_ids=input_ids, + turn_lengths=decomposed["turn_lengths"], + turn_roles=decomposed["turn_roles"], + turn_contents=decomposed["turn_contents"], + generation_logprobs=gen_logprobs, + ) + + for sample in rebuilt: + for turn in sample: + if turn["role"] == "assistant": + assert "generation_logprobs" in turn + assert turn["generation_logprobs"].shape == turn["token_ids"].shape + else: + assert "generation_logprobs" not in turn + + +def test_attach_message_log_view_populates_batch() -> None: + ml_batch = _build_message_log_batch() + decomposed = decompose_message_log(ml_batch) + input_ids = torch.zeros((2, 6), dtype=torch.long) + input_ids[0, :5] = torch.tensor([1, 2, 3, 4, 5]) + input_ids[1, :6] = torch.tensor([6, 7, 8, 9, 10, 11]) + batch: BatchedDataDict[Any] = BatchedDataDict( + {"input_ids": input_ids, **{k: decomposed[k] for k in MESSAGE_LOG_BULK_FIELDS}} + ) + assert "message_log" not in batch + attach_message_log_view(batch) + assert "message_log" in batch + assert len(batch["message_log"]) == 2 + assert batch["message_log"][0][1]["role"] == "assistant" + + +def test_attach_message_log_view_noop_when_fields_absent() -> None: + """Without decomposed fields, ``attach_message_log_view`` must leave the batch unchanged.""" + batch: BatchedDataDict[Any] = BatchedDataDict({"input_ids": torch.zeros((2, 4))}) + attach_message_log_view(batch) + assert "message_log" not in batch + + +def test_attach_message_log_view_idempotent() -> None: + """Calling twice produces the same shape (no exceptions, no doubled state).""" + ml_batch = _build_message_log_batch() + decomposed = decompose_message_log(ml_batch) + input_ids = torch.zeros((2, 6), dtype=torch.long) + batch: BatchedDataDict[Any] = BatchedDataDict( + {"input_ids": input_ids, **{k: decomposed[k] for k in MESSAGE_LOG_BULK_FIELDS}} + ) + attach_message_log_view(batch) + first_len = len(batch["message_log"]) + attach_message_log_view(batch) + assert len(batch["message_log"]) == first_len diff --git a/tests/unit/data_plane/test_observability.py b/tests/unit/data_plane/test_observability.py new file mode 100644 index 0000000000..212d08e28d --- /dev/null +++ b/tests/unit/data_plane/test_observability.py @@ -0,0 +1,140 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Unit tests for the lean observability decorator. + +Wraps :class:`NoOpDataPlaneClient` so the tests run in the slim Tier-1 +venv (no TQ, no Ray). The lean shape is one user-injected ``on_event`` +callback plus :meth:`snapshot` for cumulative totals — no ABC, no +built-in sinks. +""" + +from __future__ import annotations + +import pytest +import torch +from tensordict import TensorDict + +from nemo_rl.data_plane.adapters.noop import NoOpDataPlaneClient +from nemo_rl.data_plane.observability import MetricsDataPlaneClient + + +@pytest.fixture +def wrapped_client(): + events: list[dict] = [] + inner = NoOpDataPlaneClient() + client = MetricsDataPlaneClient(inner, on_event=events.append) + yield client, events + inner.close() + + +def test_put_records_bytes_and_count(wrapped_client): + client, events = wrapped_client + client.register_partition( + partition_id="p", fields=["x"], num_samples=4, consumer_tasks=["read"] + ) + fields = TensorDict({"x": torch.zeros(4, dtype=torch.float32)}, batch_size=[4]) + client.kv_batch_put(keys=["a", "b", "c", "d"], partition_id="p", fields=fields) + + put_events = [e for e in events if e["op"] == "put"] + assert len(put_events) == 1 + e = put_events[0] + assert e["status"] == "ok" + assert e["n_keys"] == 4 + assert e["n_bytes"] == 16 # 4 floats * 4 bytes + assert e["wall_ms"] >= 0 + + +def test_get_records_after_put(wrapped_client): + client, events = wrapped_client + client.register_partition( + partition_id="p", fields=["x"], num_samples=2, consumer_tasks=["read"] + ) + client.kv_batch_put( + keys=["a", "b"], + partition_id="p", + fields=TensorDict({"x": torch.ones(2)}, batch_size=[2]), + ) + out = client.kv_batch_get(keys=["a", "b"], partition_id="p", select_fields=["x"]) + assert torch.equal(out["x"], torch.ones(2)) + + get_events = [e for e in events if e["op"] == "get"] + assert len(get_events) == 1 + assert get_events[0]["n_bytes"] > 0 + + +def test_register_and_clear_recorded(wrapped_client): + client, events = wrapped_client + client.register_partition( + partition_id="p", fields=["x"], num_samples=1, consumer_tasks=["r"] + ) + client.kv_clear(keys=None, partition_id="p") + + ops = [e["op"] for e in events] + assert ops.count("register") == 1 + assert ops.count("clear") == 1 + + +def test_error_status_recorded_and_reraised(wrapped_client): + """Decorator does NOT swallow errors — re-raise after recording.""" + client, events = wrapped_client + with pytest.raises(KeyError): + client.kv_batch_get(keys=["a"], partition_id="nope", select_fields=["x"]) + + err = [e for e in events if e["op"] == "get" and e["status"] == "error"] + assert len(err) == 1 + + +def test_snapshot_accumulates_successful_ops(wrapped_client): + client, _ = wrapped_client + client.register_partition( + partition_id="p", fields=["x"], num_samples=1, consumer_tasks=["r"] + ) + client.kv_batch_put( + keys=["a"], + partition_id="p", + fields=TensorDict({"x": torch.zeros(1)}, batch_size=[1]), + ) + snap = client.snapshot() + assert snap["total_ops"] >= 2 # register + put + assert snap["total_bytes"] >= 4 # 1 float = 4 bytes + + +def test_default_callback_is_noop(): + """Omitting on_event must not raise; the wrapper just forwards.""" + inner = NoOpDataPlaneClient() + client = MetricsDataPlaneClient(inner) + client.register_partition( + partition_id="p", fields=["x"], num_samples=1, consumer_tasks=["r"] + ) + client.close() + + +def test_close_propagates(wrapped_client): + client, _ = wrapped_client + client.close() + # Second close must not raise — NoOp is idempotent. + client.close() + + +def test_factory_wraps_when_observability_enabled(): + """Programmatic wrap path; factory.py uses the same MetricsDataPlaneClient.""" + inner = NoOpDataPlaneClient() + seen: list[dict] = [] + client = MetricsDataPlaneClient(inner, on_event=seen.append) + assert hasattr(client, "snapshot") + client.register_partition( + partition_id="p", fields=["x"], num_samples=1, consumer_tasks=["r"] + ) + assert len(seen) == 1 and seen[0]["op"] == "register" + client.close() diff --git a/tests/unit/data_plane/test_preshard_extras.py b/tests/unit/data_plane/test_preshard_extras.py new file mode 100644 index 0000000000..2b0a79cfe7 --- /dev/null +++ b/tests/unit/data_plane/test_preshard_extras.py @@ -0,0 +1,195 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the rollout first-write helper and the meta-only sharder. + +After the sync 1-hop refactor, ``fan_out_per_rank_metas`` was retired in +favor of: + + * ``kv_first_write`` — single flat ``kv_batch_put`` of every tensor + field in the rollout output (multimodal extras ride along). + * ``shard_meta_for_dp`` — pure key-list split per DP rank, no I/O. + +These tests lock in the schema-extensibility behavior (multimodal +fields propagate) and the meta-sharding contract (no key minting, +identity preserved across shards). +""" + +from __future__ import annotations + +import torch + +from nemo_rl.data_plane import KVBatchMeta +from nemo_rl.data_plane.adapters.noop import NoOpDataPlaneClient +from nemo_rl.data_plane.column_io import kv_first_write +from nemo_rl.data_plane.preshard import shard_meta_for_dp +from nemo_rl.data_plane.schema import DP_TRAIN_FIELDS +from nemo_rl.distributed.batched_data_dict import BatchedDataDict + + +def _keys_from_uids(uids: list[str], n_gen: int = 1) -> list[str]: + return [f"{uid}_g{i}" for uid in uids for i in range(n_gen)] + + +def _final_batch(n_samples: int = 4, *, with_extras: bool = False) -> BatchedDataDict: + d: BatchedDataDict = BatchedDataDict() + d["input_ids"] = torch.zeros((n_samples, 8), dtype=torch.long) + d["input_lengths"] = torch.tensor([8] * n_samples, dtype=torch.long) + d["token_mask"] = torch.ones((n_samples, 8), dtype=torch.long) + d["sample_mask"] = torch.ones((n_samples,), dtype=torch.long) + d["generation_logprobs"] = torch.zeros((n_samples, 8), dtype=torch.float32) + if with_extras: + d["pixel_values"] = torch.zeros((n_samples, 3, 4, 4), dtype=torch.float32) + return d + + +def _setup_partition(client: NoOpDataPlaneClient, *, num_samples: int): + client.register_partition( + partition_id="train", + fields=list(DP_TRAIN_FIELDS), + num_samples=num_samples, + consumer_tasks=["train"], + ) + + +# ── kv_first_write schema extensibility ──────────────────────────────── + + +def test_kv_first_write_writes_seed_fields(): + client = NoOpDataPlaneClient() + _setup_partition(client, num_samples=4) + fb = _final_batch(4) + uids = [f"u{i}" for i in range(4)] + meta = kv_first_write( + fb, keys=_keys_from_uids(uids), dp_client=client, partition_id="train" + ) + # Every tensor field in the input lands in TQ under f"{uid}_g0". + assert meta.keys == [f"u{i}_g0" for i in range(4)] + fetched = client.kv_batch_get( + keys=meta.keys, + partition_id="train", + select_fields=["input_ids", "input_lengths", "token_mask", "sample_mask"], + ) + assert fetched["input_ids"].shape == (4, 8) + + +def test_kv_first_write_carries_multimodal_extras(): + """VLM extras (pixel_values) ride along with no schema declaration.""" + client = NoOpDataPlaneClient() + _setup_partition(client, num_samples=4) + fb = _final_batch(4, with_extras=True) + uids = [f"u{i}" for i in range(4)] + meta = kv_first_write( + fb, keys=_keys_from_uids(uids), dp_client=client, partition_id="train" + ) + assert "pixel_values" in (meta.fields or []) + fetched = client.kv_batch_get( + keys=meta.keys, + partition_id="train", + select_fields=["pixel_values"], + ) + assert fetched["pixel_values"].shape == (4, 3, 4, 4) + + +def test_kv_first_write_keys_match_uids_x_ngen(): + """Keys round-trip: caller mints ``f"{uid}_g{i}"``, helper preserves them + in ``meta.keys`` byte-for-byte.""" + client = NoOpDataPlaneClient() + _setup_partition(client, num_samples=6) + fb = _final_batch(6) # 3 prompts Ɨ 2 generations + uids = ["a", "b", "c"] + keys = _keys_from_uids(uids, n_gen=2) + meta = kv_first_write(fb, keys=keys, dp_client=client, partition_id="train") + assert meta.keys == ["a_g0", "a_g1", "b_g0", "b_g1", "c_g0", "c_g1"] + + +# ── shard_meta_for_dp invariants ────────────────────────────────────── + + +def _meta(n: int) -> KVBatchMeta: + return KVBatchMeta( + partition_id="train", + task_name="train", + keys=[f"k{i}" for i in range(n)], + fields=list(DP_TRAIN_FIELDS), + sequence_lengths=[10 + i for i in range(n)], + extra_info={}, + ) + + +def test_shard_meta_for_dp_partitions_keys_disjointly(): + n, dp = 8, 4 + metas, _ = shard_meta_for_dp(_meta(n), dp_world=dp, batch_size=n) + assert len(metas) == dp + flat = [k for m in metas for k in m.keys] + assert sorted(flat) == sorted(_meta(n).keys) # same set, no dups, no minting + + +def test_shard_meta_for_dp_preserves_partition_id(): + metas, _ = shard_meta_for_dp(_meta(4), dp_world=2, batch_size=4) + assert all(m.partition_id == "train" for m in metas) + + +def test_shard_meta_for_dp_unsorted_round_trip(): + """unsorted_indices must reconstruct the input order from DP-rank concat.""" + n, dp = 8, 4 + metas, unsorted = shard_meta_for_dp(_meta(n), dp_world=dp, batch_size=n) + if unsorted is None: + # No reorder happened — DP-rank concat IS the original order. + return + # Build a tensor whose row i is i; permute via dispatch order; reorder back. + flat = [k for m in metas for k in m.keys] + aggregated = torch.tensor([_meta(n).keys.index(k) for k in flat]) + restored = aggregated[torch.tensor(unsorted)] + assert restored.tolist() == list(range(n)) + + +# ── meta utility helpers ────────────────────────────────────────────── + + +def test_kvbatchmeta_subset_filters_keys_and_seqlens(): + m = _meta(6) + sub = m.subset([1, 3, 5]) + assert sub.keys == ["k1", "k3", "k5"] + assert sub.sequence_lengths == [11, 13, 15] + assert sub.partition_id == m.partition_id + + +def test_kvbatchmeta_concat_joins_keys_and_seqlens(): + m1 = _meta(3) + m2 = _meta(6).subset([3, 4, 5]) + j = m1.concat(m2) + assert j.keys == ["k0", "k1", "k2", "k3", "k4", "k5"] + assert j.sequence_lengths == [10, 11, 12, 13, 14, 15] + + +def test_kvbatchmeta_slice_takes_range(): + m = _meta(5) + s = m.slice(1, 4) + assert s.keys == ["k1", "k2", "k3"] + assert s.sequence_lengths == [11, 12, 13] + + +def test_kvbatchmeta_concat_rejects_partition_mismatch(): + import pytest + + m1 = _meta(2) + m2 = KVBatchMeta( + partition_id="other", + task_name="train", + keys=["x", "y"], + fields=None, + sequence_lengths=[1, 2], + ) + with pytest.raises(ValueError, match=r"partition_ids must match"): + m1.concat(m2) diff --git a/tests/unit/data_plane/test_smoke.py b/tests/unit/data_plane/test_smoke.py new file mode 100644 index 0000000000..2024ca633d --- /dev/null +++ b/tests/unit/data_plane/test_smoke.py @@ -0,0 +1,120 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tier-0 smoke tests — pre-commit gates. + +Cheapest tier: catches drift in module paths, registry keys, and the +public ABC surface. Each test runs in milliseconds and never touches +real Ray / vLLM / TQ. +""" + +from __future__ import annotations + +import inspect + + +def test_sync_utils_module_imports() -> None: + """Catches FQN drift after the algorithms.sync_utils consolidation.""" + from nemo_rl.experience.sync_rollout_actor import ( + SyncRolloutActor, + kv_first_write, + ) + + # ``SyncRolloutActor`` is wrapped by ``@ray.remote`` into + # ``ActorClass(SyncRolloutActor)`` — the wrapper has no + # ``__name__`` attribute. Check via ``repr`` instead. + assert "SyncRolloutActor" in repr(SyncRolloutActor) + assert callable(kv_first_write) + + +def test_sync_rollout_actor_registered_under_vllm_tier() -> None: + """Multinode runs depend on this — without it, tensordict missing on + worker nodes (real bug seen in job 11614968).""" + from nemo_rl.distributed.ray_actor_environment_registry import ( + get_actor_python_env, + ) + from nemo_rl.distributed.virtual_cluster import PY_EXECUTABLES + + fqn = "nemo_rl.experience.sync_rollout_actor.SyncRolloutActor" + env = get_actor_python_env(fqn) + # Same tier as vLLM workers / AsyncTrajectoryCollector / ReplayBuffer. + # Allow either the resolved exec path or the SYSTEM-override sentinel. + assert env in (PY_EXECUTABLES.VLLM, PY_EXECUTABLES.SYSTEM), ( + f"unexpected env tier for {fqn}: {env!r}" + ) + + +def test_kvbatchmeta_schema_unchanged() -> None: + """Schema break check — KVBatchMeta is the cross-process boundary; + adding/removing a field silently would break adapters that pickle it.""" + from nemo_rl.data_plane.interfaces import KVBatchMeta + + expected_fields = { + "partition_id", + "task_name", + "keys", + "fields", + "sequence_lengths", + "extra_info", + } + actual_fields = {f.name for f in KVBatchMeta.__dataclass_fields__.values()} + assert actual_fields == expected_fields, ( + f"KVBatchMeta schema drifted. expected={expected_fields}, " + f"actual={actual_fields}" + ) + + +def test_dataplane_client_abc_surface() -> None: + """Catches accidental ABC method removal / rename — e.g. dropping + ``kv_clear`` would break step-end teardown silently.""" + from nemo_rl.data_plane.interfaces import DataPlaneClient + + expected_methods = { + # task-mediated + "register_partition", + "claim_meta", + "get_data", + "check_consumption_status", + # direct-by-key + "kv_batch_put", + "kv_batch_get", + "kv_clear", + # lifecycle + "close", + } + actual_methods = { + name + for name, member in inspect.getmembers(DataPlaneClient, callable) + if not name.startswith("_") and getattr(member, "__isabstractmethod__", False) + } + assert expected_methods.issubset(actual_methods), ( + f"DataPlaneClient ABC missing methods: {expected_methods - actual_methods}" + ) + + +def test_async_and_sync_actors_share_env_tier() -> None: + """Sync should mirror async's env tier — both drive vLLM and write + tensordict to TQ, so they need the same VLLM venv.""" + from nemo_rl.distributed.ray_actor_environment_registry import ( + get_actor_python_env, + ) + + sync_env = get_actor_python_env( + "nemo_rl.experience.sync_rollout_actor.SyncRolloutActor" + ) + async_env = get_actor_python_env( + "nemo_rl.algorithms.async_utils.AsyncTrajectoryCollector" + ) + assert sync_env == async_env, ( + f"Sync vs async env tier drift: sync={sync_env!r}, async={async_env!r}" + ) diff --git a/tests/unit/data_plane/test_sync_one_hop.py b/tests/unit/data_plane/test_sync_one_hop.py new file mode 100644 index 0000000000..2bead4fa76 --- /dev/null +++ b/tests/unit/data_plane/test_sync_one_hop.py @@ -0,0 +1,360 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Sync 1-hop unit tests. + +Coverage: + * write_columns / read_columns roundtrip — catches async-without-await + bugs (kv_batch_put returning a coroutine instead of running). The + test that didn't exist when the bug was introduced. + * Per-sample key lifecycle — ``kv_first_write`` mints keys, every + subsequent ``shard_meta_for_dp`` slice references the SAME key set + (verl pattern, no re-minting). + * Slice-only dynamic sampling — filter / cache-merge / overflow-slice + on per-sample tensors plus ``meta.keys``. +""" + +from __future__ import annotations + +import torch + +from nemo_rl.data_plane import KVBatchMeta +from nemo_rl.data_plane.adapters.noop import NoOpDataPlaneClient +from nemo_rl.data_plane.column_io import kv_first_write, read_columns, write_columns +from nemo_rl.data_plane.preshard import shard_meta_for_dp +from nemo_rl.data_plane.schema import DP_TRAIN_FIELDS +from nemo_rl.distributed.batched_data_dict import BatchedDataDict + + +def _keys_from_uids(uids: list[str], n_gen: int = 1) -> list[str]: + return [f"{uid}_g{i}" for uid in uids for i in range(n_gen)] + + +def _final_batch(n: int = 4) -> BatchedDataDict: + d: BatchedDataDict = BatchedDataDict() + d["input_ids"] = torch.arange(n * 8, dtype=torch.long).reshape(n, 8) + d["input_lengths"] = torch.tensor([8] * n, dtype=torch.long) + d["token_mask"] = torch.ones((n, 8), dtype=torch.long) + d["sample_mask"] = torch.ones((n,), dtype=torch.long) + d["generation_logprobs"] = torch.zeros((n, 8), dtype=torch.float32) + return d + + +def _setup(client: NoOpDataPlaneClient, n: int) -> None: + client.register_partition( + partition_id="train", + fields=list(DP_TRAIN_FIELDS), + num_samples=n, + consumer_tasks=["train"], + ) + + +# ── write_columns / read_columns roundtrip ───────────────────────────── +# +# These tests would have caught the asyncio-without-await bug: +# kv_batch_put used to be an async def; calling it without await +# silently dropped the coroutine. The roundtrip below would have +# returned an empty / stale tensor in that case. + + +def test_write_columns_lands_in_tq(): + client = NoOpDataPlaneClient() + _setup(client, n=4) + fb = _final_batch(4) + uids = [f"u{i}" for i in range(4)] + meta = kv_first_write( + fb, keys=_keys_from_uids(uids), dp_client=client, partition_id="train" + ) + + # Driver delta-write: simulates advantage compute on the trainer. + delta = {"advantages": torch.full((4,), 7.5)} + write_columns(client, meta, delta) + + fetched = client.kv_batch_get( + keys=meta.keys, + partition_id="train", + select_fields=["advantages"], + ) + assert torch.equal(fetched["advantages"], torch.full((4,), 7.5)) + + +def test_read_columns_returns_only_requested_fields(): + client = NoOpDataPlaneClient() + _setup(client, n=4) + fb = _final_batch(4) + uids = [f"u{i}" for i in range(4)] + meta = kv_first_write( + fb, keys=_keys_from_uids(uids), dp_client=client, partition_id="train" + ) + + bdd = read_columns(client, meta, ["input_ids", "input_lengths"]) + assert "input_ids" in bdd + assert "input_lengths" in bdd + # token_mask was written but not requested — must not be returned. + assert "token_mask" not in bdd + + +def test_write_then_read_roundtrip_after_train_window(): + """Full lifecycle: rollout puts → driver delta-writes → read deltas back.""" + client = NoOpDataPlaneClient() + _setup(client, n=4) + fb = _final_batch(4) + uids = [f"u{i}" for i in range(4)] + meta = kv_first_write( + fb, keys=_keys_from_uids(uids), dp_client=client, partition_id="train" + ) + + # Simulate the full sync 1-hop trainer-step writes: + write_columns( + client, + meta, + { + "prev_logprobs": torch.full((4, 8), 0.1), + "reference_policy_logprobs": torch.full((4, 8), 0.2), + "advantages": torch.full((4,), 0.3), + }, + ) + + # train_presharded would fetch the union — verify all columns present. + fetched = read_columns( + client, + meta, + [ + "input_ids", + "input_lengths", + "prev_logprobs", + "reference_policy_logprobs", + "advantages", + ], + ) + assert torch.allclose(fetched["prev_logprobs"], torch.full((4, 8), 0.1)) + assert torch.allclose(fetched["reference_policy_logprobs"], torch.full((4, 8), 0.2)) + assert torch.allclose(fetched["advantages"], torch.full((4,), 0.3)) + + +# ── Per-sample key lifecycle invariant ──────────────────────────────── + + +def test_meta_keys_identity_across_dp_shards(): + """``shard_meta_for_dp`` must NOT mint new keys — every per-rank + slice references a subset of the original ``meta.keys``.""" + client = NoOpDataPlaneClient() + _setup(client, n=8) + fb = _final_batch(8) + uids = [f"u{i}" for i in range(8)] + meta = kv_first_write( + fb, keys=_keys_from_uids(uids), dp_client=client, partition_id="train" + ) + + rank_metas, _ = shard_meta_for_dp(meta, dp_world=4, batch_size=8) + flat = {k for m in rank_metas for k in m.keys} + assert flat == set(meta.keys), ( + "shard_meta_for_dp introduced or dropped keys — should be a " + "pure permutation of the original meta.keys." + ) + # Every rank slice points at the same partition. + assert all(m.partition_id == meta.partition_id for m in rank_metas) + + +def test_kv_clear_uses_meta_keys_minted_at_rollout(): + """The keys cleared at step end are the SAME keys the rollout + actor minted — no minting at any stage in between.""" + client = NoOpDataPlaneClient() + _setup(client, n=4) + fb = _final_batch(4) + uids = [f"u{i}" for i in range(4)] + meta = kv_first_write( + fb, keys=_keys_from_uids(uids), dp_client=client, partition_id="train" + ) + rollout_keys = list(meta.keys) + + # Workers / driver write deltas — keys still meta.keys. + write_columns(client, meta, {"advantages": torch.zeros(4)}) + rank_metas, _ = shard_meta_for_dp(meta, dp_world=2, batch_size=4) + for rm in rank_metas: + for k in rm.keys: + assert k in set(rollout_keys), ( + "Rank meta references a key not in the original rollout set" + ) + + client.kv_clear(keys=meta.keys, partition_id="train") + # Cleared keys should no longer fetch. + import pytest + + with pytest.raises(KeyError): + client.kv_batch_get( + keys=meta.keys, + partition_id="train", + select_fields=["input_ids"], + ) + + +# ── Slice-only dynamic sampling logic ───────────────────────────────── +# +# These exercise the private ``_apply_dynamic_sampling`` helper in +# grpo_sync.py without requiring a full trainer to spin up. + + +def _slice_data(rewards: list[float], stds: list[float]) -> BatchedDataDict: + n = len(rewards) + return BatchedDataDict( + { + "total_reward": torch.tensor(rewards, dtype=torch.float32), + "std": torch.tensor(stds, dtype=torch.float32), + "baseline": torch.zeros(n), + "input_lengths": torch.tensor([8] * n, dtype=torch.long), + "loss_multiplier": torch.ones(n), + "truncated": torch.zeros(n, dtype=torch.bool), + "length": torch.tensor([8] * n, dtype=torch.long), + "prompt_ids_for_adv": torch.zeros(n, 4, dtype=torch.long), + } + ) + + +def _seed_meta(client: NoOpDataPlaneClient, prefix: str, n: int) -> KVBatchMeta: + """Stage n keys in TQ so kv_clear has something to remove.""" + _setup(client, n=n) + fb = _final_batch(n) + uids = [f"{prefix}{i}" for i in range(n)] + return kv_first_write( + fb, keys=_keys_from_uids(uids), dp_client=client, partition_id="train" + ) + + +def test_apply_dynamic_sampling_filters_zero_std(): + """Drops uids whose std == 0 and clears their TQ payload.""" + from nemo_rl.algorithms.grpo_sync import _apply_dynamic_sampling + + client = NoOpDataPlaneClient() + meta = _seed_meta(client, "u", n=4) + sd = _slice_data([1.0, 2.0, 3.0, 4.0], [0.5, 0.0, 0.5, 0.0]) + + pm, ps, pur, complete, ds_metrics, _ = _apply_dynamic_sampling( + meta=meta, + slice_data=sd, + pending_meta=None, + pending_slice=None, + pending_unfiltered_rewards=[], + train_prompts_size=4, + num_gen_batches=1, + max_gen_batches=10, + dp_client=client, + ) + # Only 2 survivors → not complete (need 4). + assert complete is False + assert pm is not None and len(pm.keys) == 2 + # Surviving uids' total_reward is 1.0 and 3.0 (kept indices [0, 2]). + assert torch.equal(ps["total_reward"], torch.tensor([1.0, 3.0])) + assert ps["filtered_reward"] is ps["total_reward"] or torch.equal( + ps["filtered_reward"], ps["total_reward"] + ) + + # Dropped uids' TQ payload was cleared. + import pytest + + with pytest.raises(KeyError): + client.kv_batch_get( + keys=[meta.keys[1]], + partition_id="train", + select_fields=["input_ids"], + ) + # Surviving uids' payload is still alive. + survivors = client.kv_batch_get( + keys=[meta.keys[0], meta.keys[2]], + partition_id="train", + select_fields=["input_ids"], + ) + assert survivors["input_ids"].shape == (2, 8) + + +def test_apply_dynamic_sampling_completes_when_train_size_reached(): + """When pending cache reaches train_prompts_size, returns complete.""" + from nemo_rl.algorithms.grpo_sync import _apply_dynamic_sampling + + client = NoOpDataPlaneClient() + meta = _seed_meta(client, "u", n=4) + sd = _slice_data([1.0, 2.0, 3.0, 4.0], [0.5, 0.5, 0.5, 0.5]) + + pm, ps, _, complete, ds_metrics, unfiltered = _apply_dynamic_sampling( + meta=meta, + slice_data=sd, + pending_meta=None, + pending_slice=None, + pending_unfiltered_rewards=[], + train_prompts_size=4, + num_gen_batches=1, + max_gen_batches=10, + dp_client=client, + ) + assert complete is True + assert pm is not None and len(pm.keys) == 4 + assert ds_metrics["dynamic_sampling_num_gen_batches"] == 1 + # Unfiltered rewards mirror the input (no filtering happened). + assert torch.equal(unfiltered, torch.tensor([1.0, 2.0, 3.0, 4.0])) + + +def test_apply_dynamic_sampling_overflow_slices_and_clears(): + """When the cache exceeds train_prompts_size, slice + kv_clear discards.""" + from nemo_rl.algorithms.grpo_sync import _apply_dynamic_sampling + + client = NoOpDataPlaneClient() + meta = _seed_meta(client, "u", n=6) + sd = _slice_data([1.0] * 6, [0.5] * 6) + + pm, ps, _, complete, ds_metrics, _ = _apply_dynamic_sampling( + meta=meta, + slice_data=sd, + pending_meta=None, + pending_slice=None, + pending_unfiltered_rewards=[], + train_prompts_size=4, # only need 4; 2 should be discarded + num_gen_batches=1, + max_gen_batches=10, + dp_client=client, + ) + assert complete is True + assert len(pm.keys) == 4 + assert ds_metrics.get("dynamic_sampling_num_discarded_valid_samples") == 2 + # Discarded uids (last 2) cleared from TQ. + import pytest + + with pytest.raises(KeyError): + client.kv_batch_get( + keys=[meta.keys[4]], + partition_id="train", + select_fields=["input_ids"], + ) + + +def test_apply_dynamic_sampling_raises_on_max_gen_batches(): + """Exceeding dynamic_sampling_max_gen_batches must raise loudly.""" + from nemo_rl.algorithms.grpo_sync import _apply_dynamic_sampling + + client = NoOpDataPlaneClient() + meta = _seed_meta(client, "u", n=2) + sd = _slice_data([1.0, 2.0], [0.0, 0.0]) # all dropped + + import pytest + + with pytest.raises(ValueError, match=r"max_gen_batches"): + _apply_dynamic_sampling( + meta=meta, + slice_data=sd, + pending_meta=None, + pending_slice=None, + pending_unfiltered_rewards=[], + train_prompts_size=4, + num_gen_batches=11, + max_gen_batches=10, # exceeded + dp_client=client, + ) diff --git a/uv.lock b/uv.lock index 47607323b8..789ad65c1c 100644 --- a/uv.lock +++ b/uv.lock @@ -3,51 +3,65 @@ revision = 3 requires-python = ">=3.13.13" resolution-markers = [ "platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine == 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine != 's390x' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine == 's390x' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", "platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", "platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", "platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine == 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine != 's390x' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine == 's390x' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine == 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine != 's390x' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine == 's390x' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine == 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", "platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine == 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", "platform_machine == 'x86_64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine == 's390x' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine != 's390x' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine == 's390x' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", "platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", "platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", "platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", "platform_machine == 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine != 's390x' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine != 's390x' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine != 's390x' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine != 's390x' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine != 's390x' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", "platform_machine != 's390x' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine != 's390x' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine != 's390x' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", "platform_machine == 's390x' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine == 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine != 's390x' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine == 's390x' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", + "platform_machine == 's390x' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", ] conflicts = [[ { package = "nemo-rl", extra = "fsdp" }, @@ -104,6 +118,7 @@ overrides = [ { name = "flashinfer-python", specifier = ">=0.5.0" }, { name = "llguidance", specifier = ">=1.3.0,<1.4.0" }, { name = "mlflow", specifier = ">=3.11.1" }, + { name = "numpy", specifier = ">=2.1.0" }, { name = "nvidia-cublas", marker = "sys_platform != 'darwin'", specifier = "==13.3.0.5" }, { name = "nvidia-cudnn-cu13", marker = "sys_platform != 'darwin'", specifier = "==9.20.0.48" }, { name = "nvidia-cutlass-dsl", specifier = ">=4.4.1" }, @@ -202,8 +217,7 @@ version = "1.10.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "huggingface-hub" }, - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang') or (extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, - { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "extra == 'extra-7-nemo-rl-automodel' or extra == 'extra-7-nemo-rl-mcore' or extra == 'extra-7-nemo-rl-sglang'" }, + { name = "numpy" }, { name = "packaging" }, { name = "psutil" }, { name = "pyyaml" }, @@ -965,8 +979,8 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "loguru" }, { name = "pydantic" }, - { name = "torch", version = "2.10.0", source = { registry = "https://pypi.org/simple" }, marker = "(sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm')" }, - { name = "torch", version = "2.10.0+cu130", source = { registry = "https://download.pytorch.org/whl/cu130" }, marker = "(sys_platform != 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, + { name = "torch", version = "2.10.0", source = { registry = "https://pypi.org/simple" }, marker = "(sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm')" }, + { name = "torch", version = "2.10.0+cu130", source = { registry = "https://download.pytorch.org/whl/cu130" }, marker = "(sys_platform != 'darwin' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'darwin' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, { name = "transformers" }, ] sdist = { url = "https://files.pythonhosted.org/packages/fc/65/88dd1c58fb9d0ded51b5c86471b937a1525f91fad2211a6f051dc1ea822d/compressed_tensors-0.13.0.tar.gz", hash = "sha256:23893824d3498ea3f1a829f14a8fa85f9a5e76a34c711a038b8d7c619ca9a67c", size = 200995, upload-time = "2025-12-16T16:03:55.397Z" } @@ -988,8 +1002,7 @@ name = "contourpy" version = "1.3.3" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang') or (extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, - { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "extra == 'extra-7-nemo-rl-automodel' or extra == 'extra-7-nemo-rl-mcore' or extra == 'extra-7-nemo-rl-sglang'" }, + { name = "numpy" }, ] sdist = { url = "https://files.pythonhosted.org/packages/58/01/1253e6698a07380cd31a736d248a3f2a50a7c88779a1813da27503cadc2a/contourpy-1.3.3.tar.gz", hash = "sha256:083e12155b210502d0bca491432bb04d56dc3432f95a979b429f2848c3dbe880", size = 13466174, upload-time = "2025-07-26T12:03:12.549Z" } wheels = [ @@ -1168,39 +1181,49 @@ version = "13.0.3" source = { registry = "https://pypi.org/simple" } resolution-markers = [ "platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine == 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", "platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", "platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", "platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine == 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine == 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine == 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", "platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine == 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", "platform_machine == 'x86_64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine == 's390x' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", "platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", "platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", "platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", "platform_machine == 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine == 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", + "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", ] dependencies = [ { name = "cuda-pathfinder", marker = "sys_platform != 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, @@ -1225,8 +1248,7 @@ name = "cuda-core" version = "0.5.1" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(sys_platform != 'darwin' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore') or (sys_platform != 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, - { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "(sys_platform != 'darwin' and extra == 'extra-7-nemo-rl-automodel') or (sys_platform != 'darwin' and extra == 'extra-7-nemo-rl-mcore') or (sys_platform != 'darwin' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, + { name = "numpy", marker = "sys_platform != 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/7f/41/2cd8225b2d95191b62b0da6ad4248ad5023bba9d23c355e0b3b151c1f21f/cuda_core-0.5.1-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c89270e8a332f8c9e18e423d7e1d08d6a82115419ec813f53784d48116fc6fc6", size = 17461993, upload-time = "2026-01-15T15:40:44.796Z" }, @@ -1254,41 +1276,51 @@ version = "13.0.0" source = { registry = "https://pypi.org/simple" } resolution-markers = [ "platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine == 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine != 's390x' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine == 's390x' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", "platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", "platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", "platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine == 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine == 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine == 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", "platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine == 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", "platform_machine == 'x86_64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine == 's390x' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", "platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", "platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", "platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", "platform_machine == 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine == 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", + "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine != 's390x' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", ] dependencies = [ { name = "cuda-bindings", version = "13.0.1", source = { registry = "https://pypi.org/simple" }, marker = "(sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm')" }, @@ -1320,7 +1352,7 @@ version = "13.6.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "fastrlock", marker = "sys_platform != 'darwin'" }, - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'darwin'" }, + { name = "numpy", marker = "sys_platform != 'darwin'" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/19/ec/f62cb991f11fb41291c4c15b6936d7b67ffa71ddb344ad6e8894e06ce58d/cupy_cuda12x-13.6.0-cp313-cp313-manylinux2014_aarch64.whl", hash = "sha256:e5426ae3b1b9cf59927481e457a89e3f0b50a35b114a8034ec9110e7a833434c", size = 126904601, upload-time = "2025-08-18T08:24:59.951Z" }, @@ -1361,8 +1393,7 @@ dependencies = [ { name = "httpx" }, { name = "huggingface-hub" }, { name = "multiprocess" }, - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang') or (extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, - { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "extra == 'extra-7-nemo-rl-automodel' or extra == 'extra-7-nemo-rl-mcore' or extra == 'extra-7-nemo-rl-sglang'" }, + { name = "numpy" }, { name = "packaging" }, { name = "pandas" }, { name = "pyarrow" }, @@ -1403,8 +1434,7 @@ name = "decord2" version = "3.0.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang') or (extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, - { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "extra == 'extra-7-nemo-rl-automodel' or extra == 'extra-7-nemo-rl-mcore' or extra == 'extra-7-nemo-rl-sglang'" }, + { name = "numpy" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/83/97/7aa76800bb80d647215dcf5f471e147f26437ce70c60f01919b03b1583f1/decord2-3.0.0-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:81b03239fa891dd69ce3796a2095c81ab4bfc483abe2e13934999eb08c4c9e7f", size = 20360404, upload-time = "2025-12-18T14:39:08.422Z" }, @@ -1428,8 +1458,8 @@ source = { git = "https://github.com/deepseek-ai/DeepEP.git?rev=bfded34800dfec41 dependencies = [ { name = "ninja" }, { name = "packaging" }, - { name = "torch", version = "2.10.0", source = { registry = "https://pypi.org/simple" }, marker = "(sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-fsdp') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-mcore') or (sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, - { name = "torch", version = "2.10.0+cu130", source = { registry = "https://download.pytorch.org/whl/cu130" }, marker = "(sys_platform != 'darwin' and extra == 'extra-7-nemo-rl-automodel') or (sys_platform != 'darwin' and extra == 'extra-7-nemo-rl-fsdp') or (sys_platform != 'darwin' and extra == 'extra-7-nemo-rl-mcore') or (sys_platform != 'darwin' and extra != 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, + { name = "torch", version = "2.10.0", source = { registry = "https://pypi.org/simple" }, marker = "(sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-mcore') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang')" }, + { name = "torch", version = "2.10.0+cu130", source = { registry = "https://download.pytorch.org/whl/cu130" }, marker = "(sys_platform != 'darwin' and extra == 'extra-7-nemo-rl-automodel') or (sys_platform != 'darwin' and extra == 'extra-7-nemo-rl-mcore') or (sys_platform != 'darwin' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'darwin' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, ] [[package]] @@ -1439,8 +1469,8 @@ source = { git = "https://github.com/deepseek-ai/DeepGEMM.git?rev=7b6b5563b9d4c1 dependencies = [ { name = "ninja" }, { name = "packaging" }, - { name = "torch", version = "2.10.0", source = { registry = "https://pypi.org/simple" }, marker = "(sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore') or (sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, - { name = "torch", version = "2.10.0+cu130", source = { registry = "https://download.pytorch.org/whl/cu130" }, marker = "(sys_platform != 'darwin' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore') or (sys_platform != 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, + { name = "torch", version = "2.10.0", source = { registry = "https://pypi.org/simple" }, marker = "(sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm')" }, + { name = "torch", version = "2.10.0+cu130", source = { registry = "https://download.pytorch.org/whl/cu130" }, marker = "(sys_platform != 'darwin' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'darwin' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, ] [[package]] @@ -1500,7 +1530,7 @@ dependencies = [ { name = "httpx" }, { name = "huggingface-hub" }, { name = "importlib-metadata" }, - { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" } }, + { name = "numpy" }, { name = "pillow" }, { name = "regex" }, { name = "requests" }, @@ -1716,11 +1746,11 @@ wheels = [ [package.optional-dependencies] standard = [ { name = "email-validator" }, - { name = "fastapi-cli", extra = ["standard"], marker = "(extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang') or (extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, + { name = "fastapi-cli", extra = ["standard"], marker = "(extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, { name = "httpx" }, { name = "jinja2" }, { name = "python-multipart" }, - { name = "uvicorn", extra = ["standard"], marker = "(extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang') or (extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, + { name = "uvicorn", extra = ["standard"], marker = "(extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, ] [[package]] @@ -1730,7 +1760,7 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "rich-toolkit" }, { name = "typer" }, - { name = "uvicorn", extra = ["standard"], marker = "(extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang') or (extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, + { name = "uvicorn", extra = ["standard"], marker = "(extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/c6/94/3ef75d9c7c32936ecb539b9750ccbdc3d2568efd73b1cb913278375f4533/fastapi_cli-0.0.8.tar.gz", hash = "sha256:2360f2989b1ab4a3d7fc8b3a0b20e8288680d8af2e31de7c38309934d7f8a0ee", size = 16884, upload-time = "2025-07-07T14:44:09.326Z" } wheels = [ @@ -1740,7 +1770,7 @@ wheels = [ [package.optional-dependencies] standard = [ { name = "fastapi-cloud-cli" }, - { name = "uvicorn", extra = ["standard"], marker = "(extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang') or (extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, + { name = "uvicorn", extra = ["standard"], marker = "(extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, ] [[package]] @@ -1749,12 +1779,12 @@ version = "0.1.5" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "httpx" }, - { name = "pydantic", extra = ["email"], marker = "(extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang') or (extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, + { name = "pydantic", extra = ["email"], marker = "(extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, { name = "rich-toolkit" }, { name = "rignore" }, { name = "sentry-sdk" }, { name = "typer" }, - { name = "uvicorn", extra = ["standard"], marker = "(extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang') or (extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, + { name = "uvicorn", extra = ["standard"], marker = "(extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/a9/2e/3b6e5016affc310e5109bc580f760586eabecea0c8a7ab067611cd849ac0/fastapi_cloud_cli-0.1.5.tar.gz", hash = "sha256:341ee585eb731a6d3c3656cb91ad38e5f39809bf1a16d41de1333e38635a7937", size = 22710, upload-time = "2025-07-28T13:30:48.216Z" } wheels = [ @@ -1932,8 +1962,7 @@ dependencies = [ { name = "click" }, { name = "einops" }, { name = "ninja" }, - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang') or (extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, - { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "extra == 'extra-7-nemo-rl-automodel' or extra == 'extra-7-nemo-rl-mcore' or extra == 'extra-7-nemo-rl-sglang'" }, + { name = "numpy" }, { name = "nvidia-cudnn-frontend" }, { name = "nvidia-cutlass-dsl" }, { name = "nvidia-ml-py" }, @@ -1954,7 +1983,7 @@ name = "flashoptim" version = "0.1.3" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" } }, + { name = "numpy" }, { name = "packaging" }, { name = "torch", version = "2.10.0", source = { registry = "https://pypi.org/simple" }, marker = "(sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, { name = "torch", version = "2.10.0+cu130", source = { registry = "https://download.pytorch.org/whl/cu130" }, marker = "(sys_platform != 'darwin' and extra == 'extra-7-nemo-rl-automodel') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, @@ -2112,8 +2141,7 @@ name = "gguf" version = "0.17.1" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang') or (extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, - { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, + { name = "numpy" }, { name = "pyyaml" }, { name = "tqdm" }, ] @@ -2646,7 +2674,7 @@ name = "imageio" version = "2.37.3" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" } }, + { name = "numpy" }, { name = "pillow" }, ] sdist = { url = "https://files.pythonhosted.org/packages/b1/84/93bcd1300216ea50811cee96873b84a1bebf8d0489ffaf7f2a3756bab866/imageio-2.37.3.tar.gz", hash = "sha256:bbb37efbfc4c400fcd534b367b91fcd66d5da639aaa138034431a1c5e0a41451", size = 389673, upload-time = "2026-03-09T11:31:12.573Z" } @@ -3170,8 +3198,7 @@ dependencies = [ { name = "langchain-core" }, { name = "langchain-nvidia-ai-endpoints" }, { name = "nh3" }, - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang') or (extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, - { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "extra == 'extra-7-nemo-rl-automodel' or extra == 'extra-7-nemo-rl-mcore' or extra == 'extra-7-nemo-rl-sglang'" }, + { name = "numpy" }, { name = "pandas" }, { name = "pydantic-settings" }, { name = "requests" }, @@ -3311,8 +3338,7 @@ dependencies = [ { name = "cycler" }, { name = "fonttools" }, { name = "kiwisolver" }, - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang') or (extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, - { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "extra == 'extra-7-nemo-rl-automodel' or extra == 'extra-7-nemo-rl-mcore' or extra == 'extra-7-nemo-rl-sglang'" }, + { name = "numpy" }, { name = "packaging" }, { name = "pillow" }, { name = "pyparsing" }, @@ -3495,8 +3521,7 @@ dependencies = [ { name = "mamba-ssm" }, { name = "megatron-energon", extra = ["av-decode"] }, { name = "multi-storage-client" }, - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang') or (extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, - { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "extra == 'extra-7-nemo-rl-automodel' or extra == 'extra-7-nemo-rl-mcore' or extra == 'extra-7-nemo-rl-sglang'" }, + { name = "numpy" }, { name = "nvidia-modelopt" }, { name = "nvidia-resiliency-ext" }, { name = "nvtx" }, @@ -3554,8 +3579,7 @@ dependencies = [ { name = "braceexpand" }, { name = "click" }, { name = "multi-storage-client" }, - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang') or (extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, - { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "extra == 'extra-7-nemo-rl-automodel' or extra == 'extra-7-nemo-rl-mcore' or extra == 'extra-7-nemo-rl-sglang'" }, + { name = "numpy" }, { name = "pillow" }, { name = "pyyaml" }, { name = "s3fs" }, @@ -3600,11 +3624,10 @@ version = "1.9.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "jsonschema" }, - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang') or (extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, - { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "extra == 'extra-7-nemo-rl-automodel' or extra == 'extra-7-nemo-rl-sglang' or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm')" }, + { name = "numpy" }, { name = "pillow" }, { name = "pydantic" }, - { name = "pydantic-extra-types", extra = ["pycountry"], marker = "extra == 'extra-7-nemo-rl-automodel' or extra != 'extra-7-nemo-rl-mcore' or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm')" }, + { name = "pydantic-extra-types", extra = ["pycountry"], marker = "extra == 'extra-7-nemo-rl-automodel' or extra == 'extra-7-nemo-rl-sglang' or extra == 'extra-7-nemo-rl-vllm'" }, { name = "requests" }, { name = "tiktoken" }, { name = "typing-extensions" }, @@ -3629,76 +3652,12 @@ sentencepiece = [ { name = "sentencepiece" }, ] -[[package]] -name = "ml-dtypes" -version = "0.5.3" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine != 's390x' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine != 's390x' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine != 's390x' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine == 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine == 'x86_64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine != 's390x' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine != 's390x' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine != 's390x' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", -] -dependencies = [ - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(platform_machine != 's390x' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore') or (platform_machine != 's390x' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, - { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "(platform_machine != 's390x' and extra == 'extra-7-nemo-rl-automodel') or (platform_machine != 's390x' and extra == 'extra-7-nemo-rl-mcore') or (platform_machine != 's390x' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/78/a7/aad060393123cfb383956dca68402aff3db1e1caffd5764887ed5153f41b/ml_dtypes-0.5.3.tar.gz", hash = "sha256:95ce33057ba4d05df50b1f3cfefab22e351868a843b3b15a46c65836283670c9", size = 692316, upload-time = "2025-07-29T18:39:19.454Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/2d/87/1bcc98a66de7b2455dfb292f271452cac9edc4e870796e0d87033524d790/ml_dtypes-0.5.3-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:5103856a225465371fe119f2fef737402b705b810bd95ad5f348e6e1a6ae21af", size = 663781, upload-time = "2025-07-29T18:38:42.984Z" }, - { url = "https://files.pythonhosted.org/packages/fd/2c/bd2a79ba7c759ee192b5601b675b180a3fd6ccf48ffa27fe1782d280f1a7/ml_dtypes-0.5.3-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4cae435a68861660af81fa3c5af16b70ca11a17275c5b662d9c6f58294e0f113", size = 4956217, upload-time = "2025-07-29T18:38:44.65Z" }, - { url = "https://files.pythonhosted.org/packages/14/f3/091ba84e5395d7fe5b30c081a44dec881cd84b408db1763ee50768b2ab63/ml_dtypes-0.5.3-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6936283b56d74fbec431ca57ce58a90a908fdbd14d4e2d22eea6d72bb208a7b7", size = 4933109, upload-time = "2025-07-29T18:38:46.405Z" }, - { url = "https://files.pythonhosted.org/packages/bc/24/054036dbe32c43295382c90a1363241684c4d6aaa1ecc3df26bd0c8d5053/ml_dtypes-0.5.3-cp313-cp313-win_amd64.whl", hash = "sha256:d0f730a17cf4f343b2c7ad50cee3bd19e969e793d2be6ed911f43086460096e4", size = 208187, upload-time = "2025-07-29T18:38:48.24Z" }, - { url = "https://files.pythonhosted.org/packages/a6/3d/7dc3ec6794a4a9004c765e0c341e32355840b698f73fd2daff46f128afc1/ml_dtypes-0.5.3-cp313-cp313-win_arm64.whl", hash = "sha256:2db74788fc01914a3c7f7da0763427280adfc9cd377e9604b6b64eb8097284bd", size = 161559, upload-time = "2025-07-29T18:38:50.493Z" }, - { url = "https://files.pythonhosted.org/packages/12/91/e6c7a0d67a152b9330445f9f0cf8ae6eee9b83f990b8c57fe74631e42a90/ml_dtypes-0.5.3-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:93c36a08a6d158db44f2eb9ce3258e53f24a9a4a695325a689494f0fdbc71770", size = 689321, upload-time = "2025-07-29T18:38:52.03Z" }, - { url = "https://files.pythonhosted.org/packages/9e/6c/b7b94b84a104a5be1883305b87d4c6bd6ae781504474b4cca067cb2340ec/ml_dtypes-0.5.3-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0e44a3761f64bc009d71ddb6d6c71008ba21b53ab6ee588dadab65e2fa79eafc", size = 5274495, upload-time = "2025-07-29T18:38:53.797Z" }, - { url = "https://files.pythonhosted.org/packages/5b/38/6266604dffb43378055394ea110570cf261a49876fc48f548dfe876f34cc/ml_dtypes-0.5.3-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bdf40d2aaabd3913dec11840f0d0ebb1b93134f99af6a0a4fd88ffe924928ab4", size = 5285422, upload-time = "2025-07-29T18:38:56.603Z" }, - { url = "https://files.pythonhosted.org/packages/7c/88/8612ff177d043a474b9408f0382605d881eeb4125ba89d4d4b3286573a83/ml_dtypes-0.5.3-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:aec640bd94c4c85c0d11e2733bd13cbb10438fb004852996ec0efbc6cacdaf70", size = 661182, upload-time = "2025-07-29T18:38:58.414Z" }, - { url = "https://files.pythonhosted.org/packages/6f/2b/0569a5e88b29240d373e835107c94ae9256fb2191d3156b43b2601859eff/ml_dtypes-0.5.3-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bda32ce212baa724e03c68771e5c69f39e584ea426bfe1a701cb01508ffc7035", size = 4956187, upload-time = "2025-07-29T18:39:00.611Z" }, - { url = "https://files.pythonhosted.org/packages/51/66/273c2a06ae44562b104b61e6b14444da00061fd87652506579d7eb2c40b1/ml_dtypes-0.5.3-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c205cac07d24a29840c163d6469f61069ce4b065518519216297fc2f261f8db9", size = 4930911, upload-time = "2025-07-29T18:39:02.405Z" }, - { url = "https://files.pythonhosted.org/packages/93/ab/606be3e87dc0821bd360c8c1ee46108025c31a4f96942b63907bb441b87d/ml_dtypes-0.5.3-cp314-cp314-win_amd64.whl", hash = "sha256:cd7c0bb22d4ff86d65ad61b5dd246812e8993fbc95b558553624c33e8b6903ea", size = 216664, upload-time = "2025-07-29T18:39:03.927Z" }, - { url = "https://files.pythonhosted.org/packages/30/a2/e900690ca47d01dffffd66375c5de8c4f8ced0f1ef809ccd3b25b3e6b8fa/ml_dtypes-0.5.3-cp314-cp314-win_arm64.whl", hash = "sha256:9d55ea7f7baf2aed61bf1872116cefc9d0c3693b45cae3916897ee27ef4b835e", size = 160203, upload-time = "2025-07-29T18:39:05.671Z" }, - { url = "https://files.pythonhosted.org/packages/53/21/783dfb51f40d2660afeb9bccf3612b99f6a803d980d2a09132b0f9d216ab/ml_dtypes-0.5.3-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:e12e29764a0e66a7a31e9b8bf1de5cc0423ea72979f45909acd4292de834ccd3", size = 689324, upload-time = "2025-07-29T18:39:07.567Z" }, - { url = "https://files.pythonhosted.org/packages/09/f7/a82d249c711abf411ac027b7163f285487f5e615c3e0716c61033ce996ab/ml_dtypes-0.5.3-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:19f6c3a4f635c2fc9e2aa7d91416bd7a3d649b48350c51f7f715a09370a90d93", size = 5275917, upload-time = "2025-07-29T18:39:09.339Z" }, - { url = "https://files.pythonhosted.org/packages/7f/3c/541c4b30815ab90ebfbb51df15d0b4254f2f9f1e2b4907ab229300d5e6f2/ml_dtypes-0.5.3-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5ab039ffb40f3dc0aeeeba84fd6c3452781b5e15bef72e2d10bcb33e4bbffc39", size = 5285284, upload-time = "2025-07-29T18:39:11.532Z" }, -] - [[package]] name = "ml-dtypes" version = "0.5.4" source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "platform_machine == 's390x' and sys_platform == 'linux'", - "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux'", - "platform_machine == 's390x' and sys_platform == 'darwin'", -] dependencies = [ - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(platform_machine == 's390x' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore') or (platform_machine == 's390x' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, - { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "(platform_machine == 's390x' and extra == 'extra-7-nemo-rl-automodel') or (platform_machine == 's390x' and extra == 'extra-7-nemo-rl-mcore') or (platform_machine == 's390x' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, + { name = "numpy" }, ] sdist = { url = "https://files.pythonhosted.org/packages/0e/4a/c27b42ed9b1c7d13d9ba8b6905dece787d6259152f2309338aed29b2447b/ml_dtypes-0.5.4.tar.gz", hash = "sha256:8ab06a50fb9bf9666dd0fe5dfb4676fa2b0ac0f31ecff72a6c3af8e22c063453", size = 692314, upload-time = "2025-11-17T22:32:31.031Z" } wheels = [ @@ -3741,8 +3700,7 @@ dependencies = [ { name = "matplotlib" }, { name = "mlflow-skinny" }, { name = "mlflow-tracing" }, - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang') or (extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, - { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "extra == 'extra-7-nemo-rl-automodel' or extra == 'extra-7-nemo-rl-mcore' or extra == 'extra-7-nemo-rl-sglang'" }, + { name = "numpy" }, { name = "pandas" }, { name = "pyarrow" }, { name = "scikit-learn" }, @@ -3839,6 +3797,24 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/46/df/2c112a7c4160aa5e74dad87060019be5eca197d910af3f5b12e68ec090a9/modelscope-1.34.0-py3-none-any.whl", hash = "sha256:4629ace145972520b71b0ad02e4604282426c0cfae6a4b0922509898f3b269c8", size = 6050825, upload-time = "2026-01-19T02:50:20.018Z" }, ] +[[package]] +name = "mooncake-transfer-engine-cuda13" +version = "0.3.10.post2" +source = { url = "https://github.com/kvcache-ai/Mooncake/releases/download/v0.3.10.post2/mooncake_transfer_engine_cuda13-0.3.10.post2-cp313-cp313-manylinux_2_35_x86_64.whl" } +dependencies = [ + { name = "aiohttp", marker = "(platform_machine != 's390x' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (platform_machine != 's390x' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (platform_machine != 's390x' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 's390x' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine != 's390x' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 's390x' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 's390x' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-vllm') or (platform_machine == 's390x' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (platform_machine == 's390x' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (platform_machine == 's390x' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 's390x' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 's390x' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 's390x' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 's390x' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 's390x' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, + { name = "requests", marker = "(platform_machine != 's390x' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (platform_machine != 's390x' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (platform_machine != 's390x' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 's390x' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine != 's390x' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 's390x' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 's390x' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-vllm') or (platform_machine == 's390x' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (platform_machine == 's390x' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (platform_machine == 's390x' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 's390x' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 's390x' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 's390x' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 's390x' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 's390x' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, +] +wheels = [ + { url = "https://github.com/kvcache-ai/Mooncake/releases/download/v0.3.10.post2/mooncake_transfer_engine_cuda13-0.3.10.post2-cp313-cp313-manylinux_2_35_x86_64.whl", hash = "sha256:a96794f4d3c693e6e71ad85ef578a429ec69ab36e0c2f9b45b200d37e45d3cc0" }, +] + +[package.metadata] +requires-dist = [ + { name = "aiohttp" }, + { name = "requests" }, +] + [[package]] name = "mpmath" version = "1.3.0" @@ -4037,8 +4013,7 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "cuda-core", marker = "sys_platform != 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, { name = "cuda-pathfinder", marker = "sys_platform != 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(sys_platform != 'darwin' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore') or (sys_platform != 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, - { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "(sys_platform != 'darwin' and extra == 'extra-7-nemo-rl-automodel') or (sys_platform != 'darwin' and extra == 'extra-7-nemo-rl-mcore') or (sys_platform != 'darwin' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, + { name = "numpy", marker = "sys_platform != 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, { name = "packaging", marker = "sys_platform != 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, ] wheels = [ @@ -4301,11 +4276,11 @@ dependencies = [ { name = "math-verify" }, { name = "matplotlib" }, { name = "mlflow" }, + { name = "mooncake-transfer-engine-cuda13", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, { name = "nccl4py", marker = "sys_platform != 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, { name = "ninja" }, { name = "num2words" }, - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang') or (extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, - { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "extra == 'extra-7-nemo-rl-automodel' or extra == 'extra-7-nemo-rl-mcore' or extra == 'extra-7-nemo-rl-sglang'" }, + { name = "numpy" }, { name = "nvidia-cudnn-cu13", marker = "sys_platform != 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, { name = "nvidia-ml-py" }, { name = "nvidia-nvshmem-cu13", marker = "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, @@ -4323,12 +4298,14 @@ dependencies = [ { name = "swanlab" }, { name = "sympy" }, { name = "tensorboard" }, + { name = "tensordict" }, { name = "tiktoken" }, { name = "torch", version = "2.10.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, { name = "torch", version = "2.10.0+cu130", source = { registry = "https://download.pytorch.org/whl/cu130" }, marker = "sys_platform != 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, { name = "torchdata" }, { name = "torchvision", version = "0.25.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, { name = "torchvision", version = "0.25.0+cu130", source = { registry = "https://download.pytorch.org/whl/cu130" }, marker = "sys_platform != 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, + { name = "transferqueue" }, { name = "transformers" }, { name = "triton", version = "3.6.0", source = { registry = "https://download.pytorch.org/whl/cu130" }, marker = "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, { name = "wandb" }, @@ -4378,17 +4355,17 @@ sglang = [ { name = "sglang-kernel" }, ] vllm = [ - { name = "cuda-python", version = "13.0.0", source = { registry = "https://pypi.org/simple" }, marker = "(sys_platform != 'darwin' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore') or (sys_platform != 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, - { name = "cuda-python", version = "13.0.1", source = { registry = "https://pypi.org/simple" }, marker = "(sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore') or (sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, + { name = "cuda-python", version = "13.0.0", source = { registry = "https://pypi.org/simple" }, marker = "(sys_platform != 'darwin' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'darwin' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, + { name = "cuda-python", version = "13.0.1", source = { registry = "https://pypi.org/simple" }, marker = "(sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm')" }, { name = "deep-ep" }, { name = "deep-gemm" }, { name = "flashinfer-cubin" }, { name = "flashinfer-python" }, { name = "num2words" }, { name = "nvidia-cutlass-dsl" }, - { name = "vllm", version = "0.17.1", source = { registry = "https://pypi.org/simple" }, marker = "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, - { name = "vllm", version = "0.17.1+cu130", source = { url = "https://github.com/vllm-project/vllm/releases/download/v0.17.1/vllm-0.17.1+cu130-cp38-abi3-manylinux_2_35_aarch64.whl" }, marker = "(platform_machine == 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, - { name = "vllm", version = "0.17.1+cu130", source = { url = "https://github.com/vllm-project/vllm/releases/download/v0.17.1/vllm-0.17.1+cu130-cp38-abi3-manylinux_2_35_x86_64.whl" }, marker = "(platform_machine == 'x86_64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang') or (platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, + { name = "vllm", version = "0.17.1", source = { registry = "https://pypi.org/simple" }, marker = "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, + { name = "vllm", version = "0.17.1+cu130", source = { url = "https://github.com/vllm-project/vllm/releases/download/v0.17.1/vllm-0.17.1+cu130-cp38-abi3-manylinux_2_35_aarch64.whl" }, marker = "(platform_machine == 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine != 'aarch64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine != 'aarch64' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm')" }, + { name = "vllm", version = "0.17.1+cu130", source = { url = "https://github.com/vllm-project/vllm/releases/download/v0.17.1/vllm-0.17.1+cu130-cp38-abi3-manylinux_2_35_x86_64.whl" }, marker = "(platform_machine == 'x86_64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm')" }, ] [package.dev-dependencies] @@ -4466,6 +4443,7 @@ requires-dist = [ { name = "megatron-bridge", marker = "extra == 'mcore'", editable = "3rdparty/Megatron-Bridge-workspace" }, { name = "megatron-core", marker = "extra == 'mcore'", editable = "3rdparty/Megatron-LM-workspace" }, { name = "mlflow", specifier = ">=3.11.1" }, + { name = "mooncake-transfer-engine-cuda13", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'", url = "https://github.com/kvcache-ai/Mooncake/releases/download/v0.3.10.post2/mooncake_transfer_engine_cuda13-0.3.10.post2-cp313-cp313-manylinux_2_35_x86_64.whl" }, { name = "nccl4py", marker = "sys_platform != 'darwin'" }, { name = "nemo-automodel", extras = ["moe"], marker = "extra == 'automodel'", editable = "3rdparty/Automodel-workspace/Automodel" }, { name = "nemo-gym", marker = "extra == 'nemo-gym'", editable = "3rdparty/Gym-workspace/Gym" }, @@ -4496,12 +4474,14 @@ requires-dist = [ { name = "swanlab" }, { name = "sympy", specifier = ">=1.14.0" }, { name = "tensorboard" }, + { name = "tensordict" }, { name = "tiktoken" }, { name = "torch", marker = "sys_platform != 'darwin'", specifier = "==2.10.0", index = "https://download.pytorch.org/whl/cu130" }, { name = "torch", marker = "sys_platform == 'darwin'", specifier = "==2.10.0", index = "https://pypi.org/simple" }, { name = "torchdata" }, { name = "torchvision", marker = "sys_platform != 'darwin'", specifier = "==0.25.0", index = "https://download.pytorch.org/whl/cu130" }, { name = "torchvision", marker = "sys_platform == 'darwin'", specifier = "==0.25.0", index = "https://pypi.org/simple" }, + { name = "transferqueue", git = "https://github.com/Ascend/TransferQueue.git?rev=b266d39" }, { name = "transformer-engine", extras = ["core-cu13", "pytorch"], marker = "extra == 'automodel'", git = "https://github.com/NVIDIA/TransformerEngine.git?rev=v2.14.1" }, { name = "transformer-engine", extras = ["core-cu13", "pytorch"], marker = "extra == 'mcore'", git = "https://github.com/NVIDIA/TransformerEngine.git?rev=v2.14.1" }, { name = "transformers", specifier = "==5.3.0" }, @@ -4649,7 +4629,7 @@ version = "0.61.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "llvmlite" }, - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" } }, + { name = "numpy" }, ] sdist = { url = "https://files.pythonhosted.org/packages/1c/a0/e21f57604304aa03ebb8e098429222722ad99176a4f979d34af1d1ee80da/numba-0.61.2.tar.gz", hash = "sha256:8750ee147940a6637b80ecf7f95062185ad8726c8c28a2295b8ec1160a196f7d", size = 2820615, upload-time = "2025-04-09T02:58:07.659Z" } wheels = [ @@ -4660,80 +4640,10 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/af/a4/6d3a0f2d3989e62a18749e1e9913d5fa4910bbb3e3311a035baea6caf26d/numba-0.61.2-cp313-cp313-win_amd64.whl", hash = "sha256:59321215e2e0ac5fa928a8020ab00b8e57cda8a97384963ac0dfa4d4e6aa54e7", size = 2831846, upload-time = "2025-04-09T02:58:06.125Z" }, ] -[[package]] -name = "numpy" -version = "2.2.6" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "platform_machine == 'aarch64' and sys_platform == 'linux'", - "platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux'", - "platform_machine == 'x86_64' and sys_platform == 'linux'", - "platform_machine == 's390x' and sys_platform == 'linux'", - "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux'", - "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux'", - "platform_machine != 's390x' and sys_platform == 'darwin'", - "platform_machine == 's390x' and sys_platform == 'darwin'", -] -sdist = { url = "https://files.pythonhosted.org/packages/76/21/7d2a95e4bba9dc13d043ee156a356c0a8f0c6309dff6b21b4d71a073b8a8/numpy-2.2.6.tar.gz", hash = "sha256:e29554e2bef54a90aa5cc07da6ce955accb83f21ab5de01a62c8478897b264fd", size = 20276440, upload-time = "2025-05-17T22:38:04.611Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/f9/5c/6657823f4f594f72b5471f1db1ab12e26e890bb2e41897522d134d2a3e81/numpy-2.2.6-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0811bb762109d9708cca4d0b13c4f67146e3c3b7cf8d34018c722adb2d957c84", size = 20867828, upload-time = "2025-05-17T21:37:56.699Z" }, - { url = "https://files.pythonhosted.org/packages/dc/9e/14520dc3dadf3c803473bd07e9b2bd1b69bc583cb2497b47000fed2fa92f/numpy-2.2.6-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:287cc3162b6f01463ccd86be154f284d0893d2b3ed7292439ea97eafa8170e0b", size = 14143006, upload-time = "2025-05-17T21:38:18.291Z" }, - { url = "https://files.pythonhosted.org/packages/4f/06/7e96c57d90bebdce9918412087fc22ca9851cceaf5567a45c1f404480e9e/numpy-2.2.6-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:f1372f041402e37e5e633e586f62aa53de2eac8d98cbfb822806ce4bbefcb74d", size = 5076765, upload-time = "2025-05-17T21:38:27.319Z" }, - { url = "https://files.pythonhosted.org/packages/73/ed/63d920c23b4289fdac96ddbdd6132e9427790977d5457cd132f18e76eae0/numpy-2.2.6-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:55a4d33fa519660d69614a9fad433be87e5252f4b03850642f88993f7b2ca566", size = 6617736, upload-time = "2025-05-17T21:38:38.141Z" }, - { url = "https://files.pythonhosted.org/packages/85/c5/e19c8f99d83fd377ec8c7e0cf627a8049746da54afc24ef0a0cb73d5dfb5/numpy-2.2.6-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f92729c95468a2f4f15e9bb94c432a9229d0d50de67304399627a943201baa2f", size = 14010719, upload-time = "2025-05-17T21:38:58.433Z" }, - { url = "https://files.pythonhosted.org/packages/19/49/4df9123aafa7b539317bf6d342cb6d227e49f7a35b99c287a6109b13dd93/numpy-2.2.6-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1bc23a79bfabc5d056d106f9befb8d50c31ced2fbc70eedb8155aec74a45798f", size = 16526072, upload-time = "2025-05-17T21:39:22.638Z" }, - { url = "https://files.pythonhosted.org/packages/b2/6c/04b5f47f4f32f7c2b0e7260442a8cbcf8168b0e1a41ff1495da42f42a14f/numpy-2.2.6-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e3143e4451880bed956e706a3220b4e5cf6172ef05fcc397f6f36a550b1dd868", size = 15503213, upload-time = "2025-05-17T21:39:45.865Z" }, - { url = "https://files.pythonhosted.org/packages/17/0a/5cd92e352c1307640d5b6fec1b2ffb06cd0dabe7d7b8227f97933d378422/numpy-2.2.6-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b4f13750ce79751586ae2eb824ba7e1e8dba64784086c98cdbbcc6a42112ce0d", size = 18316632, upload-time = "2025-05-17T21:40:13.331Z" }, - { url = "https://files.pythonhosted.org/packages/f0/3b/5cba2b1d88760ef86596ad0f3d484b1cbff7c115ae2429678465057c5155/numpy-2.2.6-cp313-cp313-win32.whl", hash = "sha256:5beb72339d9d4fa36522fc63802f469b13cdbe4fdab4a288f0c441b74272ebfd", size = 6244532, upload-time = "2025-05-17T21:43:46.099Z" }, - { url = "https://files.pythonhosted.org/packages/cb/3b/d58c12eafcb298d4e6d0d40216866ab15f59e55d148a5658bb3132311fcf/numpy-2.2.6-cp313-cp313-win_amd64.whl", hash = "sha256:b0544343a702fa80c95ad5d3d608ea3599dd54d4632df855e4c8d24eb6ecfa1c", size = 12610885, upload-time = "2025-05-17T21:44:05.145Z" }, - { url = "https://files.pythonhosted.org/packages/6b/9e/4bf918b818e516322db999ac25d00c75788ddfd2d2ade4fa66f1f38097e1/numpy-2.2.6-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0bca768cd85ae743b2affdc762d617eddf3bcf8724435498a1e80132d04879e6", size = 20963467, upload-time = "2025-05-17T21:40:44Z" }, - { url = "https://files.pythonhosted.org/packages/61/66/d2de6b291507517ff2e438e13ff7b1e2cdbdb7cb40b3ed475377aece69f9/numpy-2.2.6-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:fc0c5673685c508a142ca65209b4e79ed6740a4ed6b2267dbba90f34b0b3cfda", size = 14225144, upload-time = "2025-05-17T21:41:05.695Z" }, - { url = "https://files.pythonhosted.org/packages/e4/25/480387655407ead912e28ba3a820bc69af9adf13bcbe40b299d454ec011f/numpy-2.2.6-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:5bd4fc3ac8926b3819797a7c0e2631eb889b4118a9898c84f585a54d475b7e40", size = 5200217, upload-time = "2025-05-17T21:41:15.903Z" }, - { url = "https://files.pythonhosted.org/packages/aa/4a/6e313b5108f53dcbf3aca0c0f3e9c92f4c10ce57a0a721851f9785872895/numpy-2.2.6-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:fee4236c876c4e8369388054d02d0e9bb84821feb1a64dd59e137e6511a551f8", size = 6712014, upload-time = "2025-05-17T21:41:27.321Z" }, - { url = "https://files.pythonhosted.org/packages/b7/30/172c2d5c4be71fdf476e9de553443cf8e25feddbe185e0bd88b096915bcc/numpy-2.2.6-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e1dda9c7e08dc141e0247a5b8f49cf05984955246a327d4c48bda16821947b2f", size = 14077935, upload-time = "2025-05-17T21:41:49.738Z" }, - { url = "https://files.pythonhosted.org/packages/12/fb/9e743f8d4e4d3c710902cf87af3512082ae3d43b945d5d16563f26ec251d/numpy-2.2.6-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f447e6acb680fd307f40d3da4852208af94afdfab89cf850986c3ca00562f4fa", size = 16600122, upload-time = "2025-05-17T21:42:14.046Z" }, - { url = "https://files.pythonhosted.org/packages/12/75/ee20da0e58d3a66f204f38916757e01e33a9737d0b22373b3eb5a27358f9/numpy-2.2.6-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:389d771b1623ec92636b0786bc4ae56abafad4a4c513d36a55dce14bd9ce8571", size = 15586143, upload-time = "2025-05-17T21:42:37.464Z" }, - { url = "https://files.pythonhosted.org/packages/76/95/bef5b37f29fc5e739947e9ce5179ad402875633308504a52d188302319c8/numpy-2.2.6-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:8e9ace4a37db23421249ed236fdcdd457d671e25146786dfc96835cd951aa7c1", size = 18385260, upload-time = "2025-05-17T21:43:05.189Z" }, - { url = "https://files.pythonhosted.org/packages/09/04/f2f83279d287407cf36a7a8053a5abe7be3622a4363337338f2585e4afda/numpy-2.2.6-cp313-cp313t-win32.whl", hash = "sha256:038613e9fb8c72b0a41f025a7e4c3f0b7a1b5d768ece4796b674c8f3fe13efff", size = 6377225, upload-time = "2025-05-17T21:43:16.254Z" }, - { url = "https://files.pythonhosted.org/packages/67/0e/35082d13c09c02c011cf21570543d202ad929d961c02a147493cb0c2bdf5/numpy-2.2.6-cp313-cp313t-win_amd64.whl", hash = "sha256:6031dd6dfecc0cf9f668681a37648373bddd6421fff6c66ec1624eed0180ee06", size = 12771374, upload-time = "2025-05-17T21:43:35.479Z" }, -] - [[package]] name = "numpy" version = "2.4.2" source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine == 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine != 's390x' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine == 's390x' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine == 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine != 's390x' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine == 's390x' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine == 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine == 'x86_64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine == 's390x' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine != 's390x' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine == 's390x' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine == 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine != 's390x' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine == 's390x' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", -] sdist = { url = "https://files.pythonhosted.org/packages/57/fd/0005efbd0af48e55eb3c7208af93f2862d4b1a56cd78e84309a2d959208d/numpy-2.4.2.tar.gz", hash = "sha256:659a6107e31a83c4e33f763942275fd278b21d095094044eb35569e86a21ddae", size = 20723651, upload-time = "2026-01-31T23:13:10.135Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/a1/22/815b9fe25d1d7ae7d492152adbc7226d3eff731dffc38fe970589fcaaa38/numpy-2.4.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:25f2059807faea4b077a2b6837391b5d830864b3543627f381821c646f31a63c", size = 16663696, upload-time = "2026-01-31T23:11:17.516Z" }, @@ -4785,7 +4695,7 @@ name = "nv-grouped-gemm" version = "1.1.4.post7" source = { git = "https://github.com/fanshiqing/grouped_gemm?tag=v1.1.4.post7#6dfaf60e6112166b8b82e9210b51c7f557956f0a" } dependencies = [ - { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" } }, + { name = "numpy" }, { name = "setuptools" }, { name = "torch", version = "2.10.0", source = { registry = "https://pypi.org/simple" }, marker = "(sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, { name = "torch", version = "2.10.0+cu130", source = { registry = "https://download.pytorch.org/whl/cu130" }, marker = "(sys_platform != 'darwin' and extra == 'extra-7-nemo-rl-automodel') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, @@ -4943,8 +4853,7 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "cuda-python", version = "13.0.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'darwin' or extra == 'extra-7-nemo-rl-sglang' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm')" }, { name = "cuda-python", version = "13.0.1", source = { registry = "https://pypi.org/simple" }, marker = "(sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-fsdp') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-mcore') or (sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang') or (extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, - { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "extra == 'extra-7-nemo-rl-automodel' or extra == 'extra-7-nemo-rl-mcore' or extra == 'extra-7-nemo-rl-sglang'" }, + { name = "numpy" }, { name = "typing-extensions" }, ] wheels = [ @@ -4969,8 +4878,7 @@ version = "0.43.0rc2.dev98+g905018803" source = { git = "https://github.com/NVIDIA/Model-Optimizer?rev=905018803414702e414a86716484ed4115b37ba6#905018803414702e414a86716484ed4115b37ba6" } dependencies = [ { name = "ninja" }, - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang') or (extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, - { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "extra == 'extra-7-nemo-rl-automodel' or extra == 'extra-7-nemo-rl-mcore' or extra == 'extra-7-nemo-rl-sglang'" }, + { name = "numpy" }, { name = "nvidia-ml-py" }, { name = "omegaconf" }, { name = "packaging" }, @@ -5087,10 +4995,8 @@ name = "onnx" version = "1.21.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "ml-dtypes", version = "0.5.3", source = { registry = "https://pypi.org/simple" }, marker = "platform_machine != 's390x' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, - { name = "ml-dtypes", version = "0.5.4", source = { registry = "https://pypi.org/simple" }, marker = "platform_machine == 's390x' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang') or (extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, - { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "extra == 'extra-7-nemo-rl-automodel' or extra == 'extra-7-nemo-rl-mcore' or extra == 'extra-7-nemo-rl-sglang'" }, + { name = "ml-dtypes" }, + { name = "numpy" }, { name = "protobuf" }, { name = "typing-extensions" }, ] @@ -5119,10 +5025,8 @@ name = "onnx-ir" version = "0.1.16" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "ml-dtypes", version = "0.5.3", source = { registry = "https://pypi.org/simple" }, marker = "platform_machine != 's390x' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, - { name = "ml-dtypes", version = "0.5.4", source = { registry = "https://pypi.org/simple" }, marker = "platform_machine == 's390x' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang') or (extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, - { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "extra == 'extra-7-nemo-rl-automodel' or extra == 'extra-7-nemo-rl-mcore' or extra == 'extra-7-nemo-rl-sglang'" }, + { name = "ml-dtypes" }, + { name = "numpy" }, { name = "onnx" }, { name = "typing-extensions" }, ] @@ -5136,10 +5040,8 @@ name = "onnxscript" version = "0.6.2" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "ml-dtypes", version = "0.5.3", source = { registry = "https://pypi.org/simple" }, marker = "platform_machine != 's390x' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, - { name = "ml-dtypes", version = "0.5.4", source = { registry = "https://pypi.org/simple" }, marker = "platform_machine == 's390x' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang') or (extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, - { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "extra == 'extra-7-nemo-rl-automodel' or extra == 'extra-7-nemo-rl-mcore' or extra == 'extra-7-nemo-rl-sglang'" }, + { name = "ml-dtypes" }, + { name = "numpy" }, { name = "onnx" }, { name = "onnx-ir" }, { name = "packaging" }, @@ -5248,8 +5150,7 @@ name = "opencv-python-headless" version = "4.11.0.86" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang') or (extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, - { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "extra == 'extra-7-nemo-rl-automodel' or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, + { name = "numpy" }, ] sdist = { url = "https://files.pythonhosted.org/packages/36/2f/5b2b3ba52c864848885ba988f24b7f105052f68da9ab0e693cc7c25b0b30/opencv-python-headless-4.11.0.86.tar.gz", hash = "sha256:996eb282ca4b43ec6a3972414de0e2331f5d9cda2b41091a49739c19fb843798", size = 95177929, upload-time = "2025-01-16T13:53:40.22Z" } wheels = [ @@ -5465,8 +5366,8 @@ name = "outlines-core" version = "0.2.11" source = { registry = "https://pypi.org/simple" } resolution-markers = [ - "platform_machine == 'aarch64' and sys_platform == 'linux'", "platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux'", + "platform_machine == 'aarch64' and sys_platform == 'linux'", "platform_machine == 'x86_64' and sys_platform == 'linux'", "platform_machine == 's390x' and sys_platform == 'linux'", "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux'", @@ -5532,8 +5433,7 @@ name = "pandas" version = "2.3.1" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang') or (extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, - { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "extra == 'extra-7-nemo-rl-automodel' or extra == 'extra-7-nemo-rl-mcore' or extra == 'extra-7-nemo-rl-sglang'" }, + { name = "numpy" }, { name = "python-dateutil" }, { name = "pytz" }, { name = "tzdata" }, @@ -5589,7 +5489,7 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "accelerate" }, { name = "huggingface-hub" }, - { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" } }, + { name = "numpy" }, { name = "packaging" }, { name = "psutil" }, { name = "pyyaml" }, @@ -6466,6 +6366,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/81/c4/34e93fe5f5429d7570ec1fa436f1986fb1f00c3e0f43a589fe2bbcd22c3f/pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00", size = 509225, upload-time = "2025-03-25T02:24:58.468Z" }, ] +[[package]] +name = "pyvers" +version = "0.2.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/32/99/23c73a1298b1c642d8ebdd78e1db4daf1e474152e6839df4f5c93357a3db/pyvers-0.2.2.tar.gz", hash = "sha256:205026bcd0b4c09198cb3a32f243fd179ef012882ce16d93dcb755320acd56f7", size = 12104, upload-time = "2026-01-23T14:12:07.619Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/36/bf/ea840f706b7824dd57220484465995309c8c217995ddb7ce4b262240e912/pyvers-0.2.2-py3-none-any.whl", hash = "sha256:c4696408a0b15fbaa90df33d3bc579cf23a74a73541858f5470216f12f51f3b1", size = 11569, upload-time = "2026-01-23T14:12:06.246Z" }, +] + [[package]] name = "pywin32" version = "311" @@ -6546,8 +6455,8 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "apache-tvm-ffi" }, { name = "nvidia-cutlass-dsl" }, - { name = "torch", version = "2.10.0", source = { registry = "https://pypi.org/simple" }, marker = "(sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm')" }, - { name = "torch", version = "2.10.0+cu130", source = { registry = "https://download.pytorch.org/whl/cu130" }, marker = "(sys_platform != 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, + { name = "torch", version = "2.10.0", source = { registry = "https://pypi.org/simple" }, marker = "(sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm')" }, + { name = "torch", version = "2.10.0+cu130", source = { registry = "https://download.pytorch.org/whl/cu130" }, marker = "(sys_platform != 'darwin' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'darwin' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, { name = "torch-c-dlpack-ext" }, ] sdist = { url = "https://files.pythonhosted.org/packages/7b/db/d2e480fd71c38b88ffcbf40298d604400c64e0ffcaa06d6aa61a87b2673a/quack_kernels-0.3.9.tar.gz", hash = "sha256:4fd272f52142e408a591b94be7c6a0261e222e034e599bce6da827eeae8ad04d", size = 212760, upload-time = "2026-04-05T06:34:58.642Z" } @@ -6957,8 +6866,7 @@ version = "1.7.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "joblib" }, - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang') or (extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, - { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "extra == 'extra-7-nemo-rl-automodel' or extra == 'extra-7-nemo-rl-mcore' or extra == 'extra-7-nemo-rl-sglang'" }, + { name = "numpy" }, { name = "scipy" }, { name = "threadpoolctl" }, ] @@ -6981,8 +6889,7 @@ name = "scipy" version = "1.16.1" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang') or (extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, - { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "extra == 'extra-7-nemo-rl-automodel' or extra == 'extra-7-nemo-rl-mcore' or extra == 'extra-7-nemo-rl-sglang'" }, + { name = "numpy" }, ] sdist = { url = "https://files.pythonhosted.org/packages/f5/4a/b927028464795439faec8eaf0b03b011005c487bb2d07409f28bf30879c4/scipy-1.16.1.tar.gz", hash = "sha256:44c76f9e8b6e8e488a586190ab38016e4ed2f8a038af7cd3defa903c0a2238b3", size = 30580861, upload-time = "2025-07-27T16:33:30.834Z" } wheels = [ @@ -7170,7 +7077,7 @@ dependencies = [ { name = "modelscope" }, { name = "msgspec" }, { name = "ninja" }, - { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" } }, + { name = "numpy" }, { name = "nvidia-cutlass-dsl" }, { name = "nvidia-ml-py" }, { name = "openai" }, @@ -7270,8 +7177,7 @@ name = "skops" version = "0.13.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang') or (extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, - { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "extra == 'extra-7-nemo-rl-automodel' or extra == 'extra-7-nemo-rl-mcore' or extra == 'extra-7-nemo-rl-sglang'" }, + { name = "numpy" }, { name = "packaging" }, { name = "prettytable" }, { name = "scikit-learn" }, @@ -7364,8 +7270,7 @@ version = "0.13.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "cffi" }, - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang') or (extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, - { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "extra == 'extra-7-nemo-rl-automodel' or extra == 'extra-7-nemo-rl-mcore' or extra == 'extra-7-nemo-rl-sglang'" }, + { name = "numpy" }, ] sdist = { url = "https://files.pythonhosted.org/packages/e1/41/9b873a8c055582859b239be17902a85339bec6a30ad162f98c9b0288a2cc/soundfile-0.13.1.tar.gz", hash = "sha256:b2c68dab1e30297317080a5b43df57e302584c49e2942defdde0acccc53f0e5b", size = 46156, upload-time = "2025-01-25T09:17:04.831Z" } wheels = [ @@ -7392,7 +7297,7 @@ name = "soxr" version = "1.0.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" } }, + { name = "numpy" }, ] sdist = { url = "https://files.pythonhosted.org/packages/42/7e/f4b461944662ad75036df65277d6130f9411002bfb79e9df7dff40a31db9/soxr-1.0.0.tar.gz", hash = "sha256:e07ee6c1d659bc6957034f4800c60cb8b98de798823e34d2a2bba1caa85a4509", size = 171415, upload-time = "2025-09-07T13:22:21.317Z" } wheels = [ @@ -7764,8 +7669,7 @@ dependencies = [ { name = "absl-py" }, { name = "grpcio" }, { name = "markdown" }, - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang') or (extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, - { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "extra == 'extra-7-nemo-rl-automodel' or extra == 'extra-7-nemo-rl-mcore' or extra == 'extra-7-nemo-rl-sglang'" }, + { name = "numpy" }, { name = "packaging" }, { name = "pillow" }, { name = "protobuf" }, @@ -7787,15 +7691,45 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/73/c6/825dab04195756cf8ff2e12698f22513b3db2f64925bdd41671bfb33aaa5/tensorboard_data_server-0.7.2-py3-none-manylinux_2_31_x86_64.whl", hash = "sha256:ef687163c24185ae9754ed5650eb5bc4d84ff257aabdc33f0cc6f74d8ba54530", size = 6590363, upload-time = "2023-10-23T21:23:35.583Z" }, ] +[[package]] +name = "tensordict" +version = "0.12.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cloudpickle" }, + { name = "importlib-metadata" }, + { name = "numpy" }, + { name = "packaging" }, + { name = "pyvers" }, + { name = "torch", version = "2.10.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, + { name = "torch", version = "2.10.0+cu130", source = { registry = "https://download.pytorch.org/whl/cu130" }, marker = "sys_platform != 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/4d/00/bd86f3df83d4718a6d57768cffbe235440f52cb7caafa77d19c3661ec5a2/tensordict-0.12.2-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:ce53dd911d63719edd5462e1d6dfae4bd55e4b5fa5bceb7fac9b8b0749a715a5", size = 889359, upload-time = "2026-04-20T15:11:35.593Z" }, + { url = "https://files.pythonhosted.org/packages/ef/61/4b51ab1892155fa6fc3373773cdea7beb56e5636a6484459dd7452636bca/tensordict-0.12.2-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:e005a04d00b499a1a36883338145ae014ddd53a9498e369535d4c499c8867928", size = 532982, upload-time = "2026-04-20T15:11:37.25Z" }, + { url = "https://files.pythonhosted.org/packages/56/49/a851c2c610ed6d08714d4c6af91287cfb250a70fa166678d09f48e532cea/tensordict-0.12.2-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:49b575a39dc1a8de138e6e519329b55eae39fba721ff43aa4e0c08afcacd5fe3", size = 536753, upload-time = "2026-04-20T15:11:38.707Z" }, + { url = "https://files.pythonhosted.org/packages/14/31/14da5697d6e57740a507fdb0c2daa424f67603647071e123b9a1f5293f00/tensordict-0.12.2-cp313-cp313-win_amd64.whl", hash = "sha256:2710b7ce7730c544d2519b0b466a0d47a61319e552c49da54d454d41ccef452f", size = 586005, upload-time = "2026-04-20T15:11:40.365Z" }, + { url = "https://files.pythonhosted.org/packages/2a/2e/b9509652ddd69de4b738cef8f246072667fc51a91be026f005f3e666657d/tensordict-0.12.2-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:70b185f0f9545f5e79d64383498a933b780cd14d017b447556e4d4ed1e0f3e33", size = 894783, upload-time = "2026-04-20T15:11:42.12Z" }, + { url = "https://files.pythonhosted.org/packages/d3/d3/41a21801bbc1c6cf6374c4f7271904815095a5b3375f22c14d0f7e02050e/tensordict-0.12.2-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:0c881da6d48189357ab414f9cb3394a6d0513076b2287c3e7f9a47e5d0ab1730", size = 534421, upload-time = "2026-04-20T15:11:43.496Z" }, + { url = "https://files.pythonhosted.org/packages/d0/d3/828793ad818935b300fb61eb0c9041c572bb6f8d124cef43e6323a6f6b4d/tensordict-0.12.2-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:8294507ea68b37c342087113f651bd36f823b805bd7cabe9440c587d507fc744", size = 538294, upload-time = "2026-04-20T15:11:44.814Z" }, + { url = "https://files.pythonhosted.org/packages/d8/eb/43e87ba618ed1844e5a537258381966e12fc0b032bfb57d617cb7395d818/tensordict-0.12.2-cp313-cp313t-win_amd64.whl", hash = "sha256:3e1a93bffe9d459616724327c8f3e0b05d63737db94232d69913ffa5af2b81d1", size = 596851, upload-time = "2026-04-20T15:11:46.292Z" }, + { url = "https://files.pythonhosted.org/packages/bd/ab/d8addd40ca726dc62807d1a5911e950cba93eda20a23c8ae3b5bfbe33c03/tensordict-0.12.2-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:9264c2c9048ea343f3ef35403737f1840a3726cfa5788f832d377f171f5af88e", size = 889474, upload-time = "2026-04-20T15:11:47.972Z" }, + { url = "https://files.pythonhosted.org/packages/c7/3c/455b6dbb18ac13c7972d2cf2af0ce0f4ed760fd230a3608a5822ce2a6384/tensordict-0.12.2-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:bda6249a2abecd4e31d38dde4d76d75b826da0169cbb5e1570b6c63ed0ee503c", size = 533680, upload-time = "2026-04-20T15:11:49.621Z" }, + { url = "https://files.pythonhosted.org/packages/dd/d7/048b7955f0389047f8536ae87b97203f19f2aee1f11b592d1c1ff741892b/tensordict-0.12.2-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:7be1a5ac3c9f4f4dd52a8b5f08c0f13412d4ebd9adcc172f2820c8b58fbad5fd", size = 536992, upload-time = "2026-04-20T15:11:51.017Z" }, + { url = "https://files.pythonhosted.org/packages/61/75/095c3b38edf9b931ca2f7070f0b58fb586e653176b0e7ec3ecc70691ece2/tensordict-0.12.2-cp314-cp314-win_amd64.whl", hash = "sha256:23263a366a5194a28556910faa28e918da77c95f9c6b8d7af7164996a6fe955a", size = 586233, upload-time = "2026-04-20T15:11:52.507Z" }, + { url = "https://files.pythonhosted.org/packages/39/d9/2d4efbdbeccde24db630932ce513e85d015fb344a5f3654bc6c73a6d0e86/tensordict-0.12.2-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:06c285ac0948cbc062d3c8222b5a6419b215c7db5f9f9661247ff0100b3db00a", size = 893898, upload-time = "2026-04-20T15:11:53.846Z" }, + { url = "https://files.pythonhosted.org/packages/39/27/e3f5334e6a731cdd4396234c96f8c769f4a20d660d04cca6bd4e52156ec8/tensordict-0.12.2-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:29fb0719a75b17abe0b20c6a7630418df73f1f333be7ad482159624f7a8d6811", size = 534013, upload-time = "2026-04-20T15:11:55.569Z" }, + { url = "https://files.pythonhosted.org/packages/f7/a2/d38921d633510c554f3f9238ee60f367edf538326d480f79c770f2f2b69e/tensordict-0.12.2-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:c088c6545cd41bb80a6fef7b61cdd11709657bc81f4996e70677c3385fdbb0e4", size = 537720, upload-time = "2026-04-20T15:11:57.276Z" }, + { url = "https://files.pythonhosted.org/packages/1a/bb/5efddebb17fa54067ff4e16bba839a6999274d3fb6feabfeebd13e9e8f6d/tensordict-0.12.2-cp314-cp314t-win_amd64.whl", hash = "sha256:2e8ad583e299afd8dfe0f4b9a00f751c844c482010c2bb22c8029be071af826d", size = 596848, upload-time = "2026-04-20T15:11:59.001Z" }, +] + [[package]] name = "tensorstore" version = "0.1.76" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "ml-dtypes", version = "0.5.3", source = { registry = "https://pypi.org/simple" }, marker = "platform_machine != 's390x' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, - { name = "ml-dtypes", version = "0.5.4", source = { registry = "https://pypi.org/simple" }, marker = "platform_machine == 's390x' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang') or (extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, - { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "extra == 'extra-7-nemo-rl-automodel' or extra == 'extra-7-nemo-rl-mcore' or extra == 'extra-7-nemo-rl-sglang'" }, + { name = "ml-dtypes" }, + { name = "numpy" }, ] sdist = { url = "https://files.pythonhosted.org/packages/ff/ae/947a9f232de7319b664ed8d278e9e0363e9294da73fd422c687ac4eb070e/tensorstore-0.1.76.tar.gz", hash = "sha256:ed0d565e7a038a84b1b5b5d9f7397caec200b53941d8889f44b7f63dd6abffe7", size = 6869230, upload-time = "2025-07-02T21:34:03.773Z" } wheels = [ @@ -7923,39 +7857,49 @@ version = "2.10.0+cu130" source = { registry = "https://download.pytorch.org/whl/cu130" } resolution-markers = [ "platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine == 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", "platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", "platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", "platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine == 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine == 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine == 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", "platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine == 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", "platform_machine == 'x86_64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine == 's390x' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", "platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", "platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", "platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", "platform_machine == 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine == 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", + "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", ] dependencies = [ { name = "cuda-bindings", version = "13.0.3", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'linux' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, @@ -7980,8 +7924,8 @@ dependencies = [ { name = "nvidia-nvtx", marker = "sys_platform == 'linux' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, { name = "setuptools", marker = "sys_platform != 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, { name = "sympy", marker = "sys_platform != 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, - { name = "triton", version = "3.6.0", source = { registry = "https://download.pytorch.org/whl/cu130" }, marker = "(platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 's390x' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (platform_machine == 's390x' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (platform_machine == 's390x' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 's390x' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 's390x' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 's390x' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 's390x' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 's390x' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, - { name = "triton", version = "3.6.0", source = { registry = "https://pypi.org/simple" }, marker = "(platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-mcore') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-sglang') or (platform_machine == 's390x' and sys_platform == 'linux') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, + { name = "triton", version = "3.6.0", source = { registry = "https://download.pytorch.org/whl/cu130" }, marker = "(platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-vllm') or (platform_machine == 's390x' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (platform_machine == 's390x' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (platform_machine == 's390x' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 's390x' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 's390x' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 's390x' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 's390x' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 's390x' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, + { name = "triton", version = "3.6.0", source = { registry = "https://pypi.org/simple" }, marker = "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-mcore') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 's390x' and sys_platform == 'linux') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, { name = "typing-extensions", marker = "sys_platform != 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, ] wheels = [ @@ -8004,8 +7948,8 @@ name = "torch-c-dlpack-ext" version = "0.1.5" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "torch", version = "2.10.0", source = { registry = "https://pypi.org/simple" }, marker = "(sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm')" }, - { name = "torch", version = "2.10.0+cu130", source = { registry = "https://download.pytorch.org/whl/cu130" }, marker = "(sys_platform != 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, + { name = "torch", version = "2.10.0", source = { registry = "https://pypi.org/simple" }, marker = "(sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm')" }, + { name = "torch", version = "2.10.0+cu130", source = { registry = "https://download.pytorch.org/whl/cu130" }, marker = "(sys_platform != 'darwin' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'darwin' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/37/de/921b6491efce5c389a5ef9bbed3d2d6660005840dae488124173180859ab/torch_c_dlpack_ext-0.1.5.tar.gz", hash = "sha256:d06f0357d575d22a168cc77acb9020fc4bae30968ceb6718a055dcbe92bacabe", size = 12913, upload-time = "2026-01-12T11:25:08.484Z" } wheels = [ @@ -8050,8 +7994,8 @@ name = "torchao" version = "0.14.1" source = { registry = "https://pypi.org/simple" } resolution-markers = [ - "platform_machine == 'aarch64' and sys_platform == 'linux'", "platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux'", + "platform_machine == 'aarch64' and sys_platform == 'linux'", "platform_machine == 'x86_64' and sys_platform == 'linux'", "platform_machine == 's390x' and sys_platform == 'linux'", "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux'", @@ -8100,21 +8044,21 @@ version = "2.10.0+cu130" source = { registry = "https://download.pytorch.org/whl/cu130" } resolution-markers = [ "platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", "platform_machine == 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine == 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine == 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", + "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", ] dependencies = [ { name = "torch", version = "2.10.0+cu130", source = { registry = "https://download.pytorch.org/whl/cu130" }, marker = "sys_platform != 'darwin'" }, @@ -8170,8 +8114,7 @@ resolution-markers = [ "platform_machine == 's390x' and sys_platform == 'darwin'", ] dependencies = [ - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore') or (sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, - { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "(sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-mcore') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, + { name = "numpy", marker = "sys_platform == 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, { name = "pillow", marker = "sys_platform == 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, { name = "torch", version = "2.10.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, ] @@ -8200,43 +8143,52 @@ version = "0.25.0+cu130" source = { registry = "https://download.pytorch.org/whl/cu130" } resolution-markers = [ "platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine == 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", "platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", "platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", "platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine == 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine == 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine == 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", "platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine == 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", "platform_machine == 'x86_64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine == 's390x' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", "platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", "platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", "platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", "platform_machine == 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine == 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", + "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", ] dependencies = [ - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(sys_platform != 'darwin' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore') or (sys_platform != 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, - { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "(sys_platform != 'darwin' and extra == 'extra-7-nemo-rl-automodel') or (sys_platform != 'darwin' and extra == 'extra-7-nemo-rl-mcore') or (sys_platform != 'darwin' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, + { name = "numpy", marker = "sys_platform != 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, { name = "pillow", marker = "sys_platform != 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, { name = "torch", version = "2.10.0+cu130", source = { registry = "https://download.pytorch.org/whl/cu130" }, marker = "sys_platform != 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, ] @@ -8276,6 +8228,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/00/c0/8f5d070730d7836adc9c9b6408dec68c6ced86b304a9b26a14df072a6e8c/traitlets-5.14.3-py3-none-any.whl", hash = "sha256:b74e89e397b1ed28cc831db7aea759ba6640cb3de13090ca145426688ff1ac4f", size = 85359, upload-time = "2024-04-19T11:11:46.763Z" }, ] +[[package]] +name = "transferqueue" +version = "0.1.7.dev0" +source = { git = "https://github.com/Ascend/TransferQueue.git?rev=b266d39#b266d39a15aae114730de36cf8317b6285436f7f" } +dependencies = [ + { name = "hydra-core" }, + { name = "msgspec" }, + { name = "numpy" }, + { name = "omegaconf" }, + { name = "psutil" }, + { name = "pyzmq" }, + { name = "ray", extra = ["default"] }, + { name = "tensordict" }, +] + [[package]] name = "transformer-engine" version = "2.14.1+366798e" @@ -8294,8 +8261,7 @@ version = "5.3.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "huggingface-hub" }, - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang') or (extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, - { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "extra == 'extra-7-nemo-rl-automodel' or extra == 'extra-7-nemo-rl-mcore' or extra == 'extra-7-nemo-rl-sglang'" }, + { name = "numpy" }, { name = "packaging" }, { name = "pyyaml" }, { name = "regex" }, @@ -8317,14 +8283,17 @@ resolution-markers = [ "platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", "platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", "platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", + "platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", + "platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", "platform_machine == 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", "platform_machine == 'x86_64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", "platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", "platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", - "platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", - "platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang'", + "platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'", + "platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'", ] wheels = [ { url = "https://download-r2.pytorch.org/whl/triton-3.6.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:58d57d6796b0004076315433526fe9d4af42044d430afdee1e6cd42a76bd6d09", upload-time = "2026-01-22T23:13:51Z" }, @@ -8342,8 +8311,8 @@ name = "triton" version = "3.6.0" source = { registry = "https://pypi.org/simple" } resolution-markers = [ - "platform_machine == 's390x' and sys_platform == 'linux'", "platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux'", + "platform_machine == 's390x' and sys_platform == 'linux'", ] wheels = [ { url = "https://files.pythonhosted.org/packages/3c/12/34d71b350e89a204c2c7777a9bba0dcf2f19a5bfdd70b57c4dbc5ffd7154/triton-3.6.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:448e02fe6dc898e9e5aa89cf0ee5c371e99df5aa5e8ad976a80b93334f3494fd", size = 176133521, upload-time = "2026-01-20T16:16:13.321Z" }, @@ -8558,7 +8527,7 @@ dependencies = [ { name = "depyf", marker = "(platform_machine != 'aarch64' and platform_machine != 'x86_64') or sys_platform != 'linux'" }, { name = "diskcache", marker = "(platform_machine != 'aarch64' and platform_machine != 'x86_64') or sys_platform != 'linux'" }, { name = "einops", marker = "(platform_machine != 'aarch64' and platform_machine != 'x86_64') or sys_platform != 'linux'" }, - { name = "fastapi", extra = ["standard"], marker = "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, + { name = "fastapi", extra = ["standard"], marker = "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, { name = "filelock", marker = "(platform_machine != 'aarch64' and platform_machine != 'x86_64') or sys_platform != 'linux'" }, { name = "flashinfer-python", marker = "(platform_machine != 'aarch64' and platform_machine != 'x86_64') or sys_platform != 'linux'" }, { name = "gguf", marker = "(platform_machine != 'aarch64' and platform_machine != 'x86_64') or sys_platform != 'linux'" }, @@ -8570,12 +8539,12 @@ dependencies = [ { name = "llguidance", marker = "(platform_machine != 'aarch64' and platform_machine != 'x86_64') or sys_platform != 'linux'" }, { name = "lm-format-enforcer", marker = "(platform_machine != 'aarch64' and platform_machine != 'x86_64') or sys_platform != 'linux'" }, { name = "mcp", marker = "(platform_machine != 'aarch64' and platform_machine != 'x86_64') or sys_platform != 'linux'" }, - { name = "mistral-common", extra = ["image"], marker = "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, + { name = "mistral-common", extra = ["image"], marker = "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, { name = "model-hosting-container-standards", marker = "(platform_machine != 'aarch64' and platform_machine != 'x86_64') or sys_platform != 'linux'" }, { name = "msgspec", marker = "(platform_machine != 'aarch64' and platform_machine != 'x86_64') or sys_platform != 'linux'" }, { name = "ninja", marker = "(platform_machine != 'aarch64' and platform_machine != 'x86_64') or sys_platform != 'linux'" }, { name = "numba", marker = "(platform_machine != 'aarch64' and platform_machine != 'x86_64') or sys_platform != 'linux'" }, - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(platform_machine != 'aarch64' and platform_machine != 'x86_64') or sys_platform != 'linux'" }, + { name = "numpy", marker = "(platform_machine != 'aarch64' and platform_machine != 'x86_64') or sys_platform != 'linux'" }, { name = "nvidia-cudnn-frontend", marker = "(platform_machine != 'aarch64' and platform_machine != 'x86_64') or sys_platform != 'linux'" }, { name = "nvidia-cutlass-dsl", marker = "(platform_machine != 'aarch64' and platform_machine != 'x86_64') or sys_platform != 'linux'" }, { name = "openai", marker = "(platform_machine != 'aarch64' and platform_machine != 'x86_64') or sys_platform != 'linux'" }, @@ -8599,7 +8568,7 @@ dependencies = [ { name = "pyyaml", marker = "(platform_machine != 'aarch64' and platform_machine != 'x86_64') or sys_platform != 'linux'" }, { name = "pyzmq", marker = "(platform_machine != 'aarch64' and platform_machine != 'x86_64') or sys_platform != 'linux'" }, { name = "quack-kernels", marker = "(platform_machine != 'aarch64' and platform_machine != 'x86_64') or sys_platform != 'linux'" }, - { name = "ray", extra = ["cgraph"], marker = "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, + { name = "ray", extra = ["cgraph"], marker = "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, { name = "regex", marker = "(platform_machine != 'aarch64' and platform_machine != 'x86_64') or sys_platform != 'linux'" }, { name = "requests", marker = "(platform_machine != 'aarch64' and platform_machine != 'x86_64') or sys_platform != 'linux'" }, { name = "sentencepiece", marker = "(platform_machine != 'aarch64' and platform_machine != 'x86_64') or sys_platform != 'linux'" }, @@ -8608,12 +8577,12 @@ dependencies = [ { name = "six", marker = "(platform_machine != 'aarch64' and platform_machine != 'x86_64') or sys_platform != 'linux'" }, { name = "tiktoken", marker = "(platform_machine != 'aarch64' and platform_machine != 'x86_64') or sys_platform != 'linux'" }, { name = "tokenizers", marker = "(platform_machine != 'aarch64' and platform_machine != 'x86_64') or sys_platform != 'linux'" }, - { name = "torch", version = "2.10.0", source = { registry = "https://pypi.org/simple" }, marker = "(sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore') or (sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, - { name = "torch", version = "2.10.0+cu130", source = { registry = "https://download.pytorch.org/whl/cu130" }, marker = "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'aarch64' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'x86_64' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, - { name = "torchaudio", version = "2.10.0", source = { registry = "https://pypi.org/simple" }, marker = "(sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore') or (sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, - { name = "torchaudio", version = "2.10.0+cu130", source = { registry = "https://download.pytorch.org/whl/cu130" }, marker = "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'aarch64' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'x86_64' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, - { name = "torchvision", version = "0.25.0", source = { registry = "https://pypi.org/simple" }, marker = "(sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore') or (sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, - { name = "torchvision", version = "0.25.0+cu130", source = { registry = "https://download.pytorch.org/whl/cu130" }, marker = "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'aarch64' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'x86_64' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, + { name = "torch", version = "2.10.0", source = { registry = "https://pypi.org/simple" }, marker = "(sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm')" }, + { name = "torch", version = "2.10.0+cu130", source = { registry = "https://download.pytorch.org/whl/cu130" }, marker = "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'aarch64' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'x86_64' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'x86_64' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm')" }, + { name = "torchaudio", version = "2.10.0", source = { registry = "https://pypi.org/simple" }, marker = "(sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm')" }, + { name = "torchaudio", version = "2.10.0+cu130", source = { registry = "https://download.pytorch.org/whl/cu130" }, marker = "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'aarch64' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'x86_64' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'x86_64' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm')" }, + { name = "torchvision", version = "0.25.0", source = { registry = "https://pypi.org/simple" }, marker = "(sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm')" }, + { name = "torchvision", version = "0.25.0+cu130", source = { registry = "https://download.pytorch.org/whl/cu130" }, marker = "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'aarch64' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'x86_64' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'x86_64' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm')" }, { name = "tqdm", marker = "(platform_machine != 'aarch64' and platform_machine != 'x86_64') or sys_platform != 'linux'" }, { name = "transformers", marker = "(platform_machine != 'aarch64' and platform_machine != 'x86_64') or sys_platform != 'linux'" }, { name = "typing-extensions", marker = "(platform_machine != 'aarch64' and platform_machine != 'x86_64') or sys_platform != 'linux'" }, @@ -8644,7 +8613,7 @@ dependencies = [ { name = "depyf", marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" }, { name = "diskcache", marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" }, { name = "einops", marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" }, - { name = "fastapi", extra = ["standard"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, + { name = "fastapi", extra = ["standard"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine != 'aarch64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine != 'aarch64' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm')" }, { name = "filelock", marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" }, { name = "flashinfer-python", marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" }, { name = "gguf", marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" }, @@ -8656,12 +8625,12 @@ dependencies = [ { name = "llguidance", marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" }, { name = "lm-format-enforcer", marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" }, { name = "mcp", marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" }, - { name = "mistral-common", extra = ["image"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, + { name = "mistral-common", extra = ["image"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine != 'aarch64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine != 'aarch64' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm')" }, { name = "model-hosting-container-standards", marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" }, { name = "msgspec", marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" }, { name = "ninja", marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" }, { name = "numba", marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" }, - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" }, + { name = "numpy", marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" }, { name = "nvidia-cudnn-frontend", marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" }, { name = "nvidia-cutlass-dsl", marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" }, { name = "openai", marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" }, @@ -8685,7 +8654,7 @@ dependencies = [ { name = "pyyaml", marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" }, { name = "pyzmq", marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" }, { name = "quack-kernels", marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" }, - { name = "ray", extra = ["cgraph"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, + { name = "ray", extra = ["cgraph"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine != 'aarch64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine != 'aarch64' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm')" }, { name = "regex", marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" }, { name = "requests", marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" }, { name = "sentencepiece", marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" }, @@ -8817,7 +8786,7 @@ dependencies = [ { name = "depyf", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, { name = "diskcache", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, { name = "einops", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "fastapi", extra = ["standard"], marker = "(platform_machine == 'x86_64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang') or (platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, + { name = "fastapi", extra = ["standard"], marker = "(platform_machine == 'x86_64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm')" }, { name = "filelock", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, { name = "flashinfer-python", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, { name = "gguf", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, @@ -8829,12 +8798,12 @@ dependencies = [ { name = "llguidance", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, { name = "lm-format-enforcer", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, { name = "mcp", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "mistral-common", extra = ["image"], marker = "(platform_machine == 'x86_64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang') or (platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, + { name = "mistral-common", extra = ["image"], marker = "(platform_machine == 'x86_64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm')" }, { name = "model-hosting-container-standards", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, { name = "msgspec", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, { name = "ninja", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, { name = "numba", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "numpy", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, { name = "nvidia-cudnn-frontend", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, { name = "nvidia-cutlass-dsl", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, { name = "openai", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, @@ -8858,7 +8827,7 @@ dependencies = [ { name = "pyyaml", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, { name = "pyzmq", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, { name = "quack-kernels", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "ray", extra = ["cgraph"], marker = "(platform_machine == 'x86_64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang') or (platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, + { name = "ray", extra = ["cgraph"], marker = "(platform_machine == 'x86_64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm')" }, { name = "regex", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, { name = "requests", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, { name = "sentencepiece", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, @@ -9091,8 +9060,7 @@ version = "1.0.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "braceexpand" }, - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang') or (extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, - { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "extra == 'extra-7-nemo-rl-automodel' or extra == 'extra-7-nemo-rl-mcore' or extra == 'extra-7-nemo-rl-sglang'" }, + { name = "numpy" }, { name = "pyyaml" }, ] sdist = { url = "https://files.pythonhosted.org/packages/5a/3a/68800d92e065cf4750ebecf973b13979c0c929b439e1293012938862038d/webdataset-1.0.2.tar.gz", hash = "sha256:7f0498be827cfa46cc5430a58768a24e2c6a410676a61be1838f53d61afdaab4", size = 80090, upload-time = "2025-06-19T23:26:21.945Z" } @@ -9256,11 +9224,10 @@ name = "xgrammar" version = "0.1.33" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang') or (extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, - { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, + { name = "numpy" }, { name = "pydantic" }, - { name = "torch", version = "2.10.0", source = { registry = "https://pypi.org/simple" }, marker = "(sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm')" }, - { name = "torch", version = "2.10.0+cu130", source = { registry = "https://download.pytorch.org/whl/cu130" }, marker = "(sys_platform != 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, + { name = "torch", version = "2.10.0", source = { registry = "https://pypi.org/simple" }, marker = "(sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm')" }, + { name = "torch", version = "2.10.0+cu130", source = { registry = "https://download.pytorch.org/whl/cu130" }, marker = "(sys_platform != 'darwin' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'darwin' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-mcore') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" }, { name = "transformers" }, { name = "triton", version = "3.6.0", source = { registry = "https://download.pytorch.org/whl/cu130" }, marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, { name = "typing-extensions" },