|
| 1 | +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. |
| 2 | +# SPDX-License-Identifier: Apache-2.0 |
| 3 | +# |
| 4 | +# Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | +# you may not use this file except in compliance with the License. |
| 6 | +# You may obtain a copy of the License at |
| 7 | +# |
| 8 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | +# |
| 10 | +# Unless required by applicable law or agreed to in writing, software |
| 11 | +# distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | +# See the License for the specific language governing permissions and |
| 14 | +# limitations under the License. |
| 15 | + |
| 16 | +"""DFlash E2E regression tests. |
| 17 | +
|
| 18 | +Tests the full DFlash pipeline using Qwen3-0.6B and the synthetic dataset |
| 19 | +(examples/dataset/synthetic_conversations_1k.jsonl). Matches the configuration |
| 20 | +in tools/launcher/examples/Qwen/Qwen3-0.6B/hf_online_dflash.yaml. |
| 21 | +
|
| 22 | +Convergence baseline (from L40 run): |
| 23 | + Step 100 (epoch 0.2): loss=6.59 acc=0.079 |
| 24 | + Step 500 (epoch 1.0): loss=1.78 acc=0.525 |
| 25 | + Step 1500 (epoch 3.0): loss=1.11 acc=0.595 |
| 26 | +""" |
| 27 | + |
| 28 | +import json |
| 29 | +import os |
| 30 | + |
| 31 | +import pytest |
| 32 | +from _test_utils.examples.run_command import MODELOPT_ROOT, run_example_command |
| 33 | + |
| 34 | +DFLASH_YAML = str( |
| 35 | + MODELOPT_ROOT / "modelopt_recipes" / "general" / "speculative_decoding" / "dflash.yaml" |
| 36 | +) |
| 37 | + |
| 38 | +CHAT_TEMPLATE = str( |
| 39 | + MODELOPT_ROOT |
| 40 | + / "tools" |
| 41 | + / "launcher" |
| 42 | + / "examples" |
| 43 | + / "Qwen" |
| 44 | + / "Qwen3-0.6B" |
| 45 | + / "chat_template_train.jinja" |
| 46 | +) |
| 47 | + |
| 48 | +SYNTH_DATA = str(MODELOPT_ROOT / "examples" / "dataset" / "synthetic_conversations_1k.jsonl") |
| 49 | + |
| 50 | +# Match tools/launcher/examples/Qwen/Qwen3-0.6B/hf_online_dflash.yaml |
| 51 | +_DFLASH_OVERRIDES = [ |
| 52 | + f"data.data_path={SYNTH_DATA}", |
| 53 | + f"data.chat_template={CHAT_TEMPLATE}", |
| 54 | + "training.training_seq_len=512", |
| 55 | + "training.per_device_train_batch_size=2", |
| 56 | + "training.logging_steps=100", |
| 57 | + "training.answer_only_loss=true", |
| 58 | + "dflash.dflash_block_size=8", |
| 59 | + "dflash.dflash_mask_token_id=151669", |
| 60 | + "dflash.dflash_use_torch_compile=False", |
| 61 | + "dflash.dflash_architecture_config.num_hidden_layers=2", |
| 62 | +] |
| 63 | + |
| 64 | + |
| 65 | +@pytest.fixture(scope="session") |
| 66 | +def qwen3_model_name(): |
| 67 | + """Qwen3-0.6B model name (downloaded from HF on first use).""" |
| 68 | + return "Qwen/Qwen3-0.6B" |
| 69 | + |
| 70 | + |
| 71 | +@pytest.fixture(scope="session") |
| 72 | +def dflash_output_dir(tmp_path_factory): |
| 73 | + return tmp_path_factory.mktemp("dflash_output") |
| 74 | + |
| 75 | + |
| 76 | +def test_dflash_training(qwen3_model_name, dflash_output_dir): |
| 77 | + """Train DFlash on Qwen3-0.6B and validate loss convergence.""" |
| 78 | + output_dir = str(dflash_output_dir / "dflash-qwen3-0.6b") |
| 79 | + overrides = [ |
| 80 | + f"model.model_name_or_path={qwen3_model_name}", |
| 81 | + f"training.output_dir={output_dir}", |
| 82 | + "training.num_train_epochs=3", |
| 83 | + "training.save_steps=500", |
| 84 | + *_DFLASH_OVERRIDES, |
| 85 | + ] |
| 86 | + |
| 87 | + run_example_command( |
| 88 | + ["./launch_train.sh", "--config", DFLASH_YAML, *overrides], |
| 89 | + "speculative_decoding", |
| 90 | + ) |
| 91 | + |
| 92 | + # Verify checkpoint was saved |
| 93 | + assert os.path.exists(os.path.join(output_dir, "modelopt_state.pth")) or any( |
| 94 | + "checkpoint-" in d |
| 95 | + for d in os.listdir(output_dir) |
| 96 | + if os.path.isdir(os.path.join(output_dir, d)) |
| 97 | + ) |
| 98 | + |
| 99 | + # Regression: verify loss decreased |
| 100 | + trainer_state = os.path.join(output_dir, "trainer_state.json") |
| 101 | + assert os.path.exists(trainer_state), "trainer_state.json not found" |
| 102 | + with open(trainer_state) as f: |
| 103 | + state = json.load(f) |
| 104 | + logs = [h for h in state.get("log_history", []) if "loss" in h] |
| 105 | + assert len(logs) >= 2, f"Expected at least 2 log entries, got {len(logs)}" |
| 106 | + |
| 107 | + first_loss = float(logs[0]["loss"]) |
| 108 | + final_loss = float(logs[-1]["loss"]) |
| 109 | + assert final_loss < first_loss, f"Loss did not decrease: {first_loss:.3f} -> {final_loss:.3f}" |
| 110 | + # Sanity: final loss should be reasonable (baseline: ~1.1 on L40) |
| 111 | + assert final_loss < 3.0, f"Final loss {final_loss:.3f} too high (expected < 3.0)" |
| 112 | + |
| 113 | + |
| 114 | +def test_dflash_resume(qwen3_model_name, dflash_output_dir): |
| 115 | + """Resume DFlash training from checkpoint.""" |
| 116 | + output_dir = str(dflash_output_dir / "dflash-qwen3-0.6b") |
| 117 | + overrides = [ |
| 118 | + f"model.model_name_or_path={qwen3_model_name}", |
| 119 | + f"training.output_dir={output_dir}", |
| 120 | + "training.num_train_epochs=4", |
| 121 | + "training.save_steps=5000", |
| 122 | + *_DFLASH_OVERRIDES, |
| 123 | + ] |
| 124 | + |
| 125 | + run_example_command( |
| 126 | + ["./launch_train.sh", "--config", DFLASH_YAML, *overrides], |
| 127 | + "speculative_decoding", |
| 128 | + ) |
| 129 | + |
| 130 | + |
| 131 | +def test_dflash_export(dflash_output_dir): |
| 132 | + """Export DFlash checkpoint to deployment format.""" |
| 133 | + output_dir = str(dflash_output_dir / "dflash-qwen3-0.6b") |
| 134 | + export_dir = str(dflash_output_dir / "dflash-export") |
| 135 | + |
| 136 | + run_example_command( |
| 137 | + [ |
| 138 | + "python", |
| 139 | + "./scripts/export_hf_checkpoint.py", |
| 140 | + "--model_path", |
| 141 | + output_dir, |
| 142 | + "--export_path", |
| 143 | + export_dir, |
| 144 | + ], |
| 145 | + "speculative_decoding", |
| 146 | + ) |
| 147 | + |
| 148 | + assert os.path.exists(os.path.join(export_dir, "model.safetensors")) |
| 149 | + assert os.path.exists(os.path.join(export_dir, "config.json")) |
| 150 | + |
| 151 | + with open(os.path.join(export_dir, "config.json")) as f: |
| 152 | + config = json.load(f) |
| 153 | + assert config["architectures"] == ["DFlashDraftModel"] |
| 154 | + assert config["model_type"] == "qwen3" |
| 155 | + assert "dflash_config" in config |
| 156 | + assert "block_size" in config |
| 157 | + |
| 158 | + |
| 159 | +def test_dflash_ar_validate(dflash_output_dir): |
| 160 | + """AR validation on trained DFlash checkpoint.""" |
| 161 | + output_dir = str(dflash_output_dir / "dflash-qwen3-0.6b") |
| 162 | + |
| 163 | + run_example_command( |
| 164 | + [ |
| 165 | + "python", |
| 166 | + "./scripts/ar_validate.py", |
| 167 | + "--model_path", |
| 168 | + output_dir, |
| 169 | + "--osl", |
| 170 | + "10", |
| 171 | + "--num_samples", |
| 172 | + "3", |
| 173 | + "--steps", |
| 174 | + "7", |
| 175 | + ], |
| 176 | + "speculative_decoding", |
| 177 | + ) |
0 commit comments