Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions examples/configs/sft.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ sft:
val_at_start: true
val_at_end: false
seed: 42
# If true, only the final message in each conversation is unmasked for loss
# computation. If false, all assistant messages are unmasked.
only_unmask_final: false

checkpointing:
enabled: true
Expand Down
6 changes: 6 additions & 0 deletions nemo_rl/algorithms/sft.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,10 @@ class SFTConfig(TypedDict):
# final checkpoint has validation metrics, which is required for get_best_checkpoint_path().
val_at_end: bool
seed: int
# If True, only the final message in each conversation is unmasked for loss
# computation, regardless of role. If False, all messages with role in
# `roles_to_train_on` (currently hard-coded to ["assistant"]) are unmasked.
only_unmask_final: bool


class MasterConfig(BaseModel, extra="allow"):
Expand Down Expand Up @@ -271,6 +275,7 @@ def validate(
add_loss_mask_to_message_log(
val_batch["message_log"],
roles_to_train_on=["assistant"],
only_unmask_final=master_config.sft["only_unmask_final"],
)

cat_and_padded, input_lengths = batched_message_log_to_flat_message(
Expand Down Expand Up @@ -434,6 +439,7 @@ def sft_train(
add_loss_mask_to_message_log(
batch["message_log"],
roles_to_train_on=["assistant"],
only_unmask_final=master_config.sft["only_unmask_final"],
)

cat_and_padded, input_lengths = batched_message_log_to_flat_message(
Expand Down
3 changes: 3 additions & 0 deletions tests/unit/reference_configs/sft.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ sft:
val_at_start: true
val_at_end: false
seed: 42
# If true, only the final message in each conversation is unmasked for loss
# computation. If false, all assistant messages are unmasked.
only_unmask_final: false

checkpointing:
enabled: true
Expand Down
Loading