sgl-project · nathanrchn · May 10, 2026
@@ -0,0 +1,42 @@
+{
+  "architectures": ["DFlashDraftModel"],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "auto_map": {
+    "AutoModel": "dflash.DFlashDraftModel"
+  },
+  "block_size": 16,
+  "bos_token_id": 1,
+  "dflash_config": {
+    "mask_token_id": 16,
+    "target_layer_ids": [1, 4, 7, 10, 13]
+  },
+  "dtype": "bfloat16",
+  "eos_token_id": 7,
+  "head_dim": 64,
+  "hidden_act": "silu",
+  "hidden_size": 2048,
+  "initializer_range": 0.02,
+  "intermediate_size": 6144,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 128000,
+  "model_type": "qwen3",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 8,
+  "num_key_value_heads": 8,
+  "num_target_layers": 16,
+  "rms_norm_eps": 1e-05,
+  "rope_theta": 1000000.0,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "vocab_size": 65536
+}
@@ -0,0 +1,38 @@
+#!/bin/bash
+
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+ROOT_DIR=$(dirname $SCRIPT_DIR)
+export TORCHINDUCTOR_CACHE_DIR=$ROOT_DIR/cache/compiled_kernels
+export SPECFORGE_DATA_NUM_PROC=32
+NUM_GPUS=${1:-8}
+
+ATTENTION_BACKEND=${2:-flex_attention}
+
+torchrun \
+    --standalone \
+    --nproc_per_node $NUM_GPUS \
+    $ROOT_DIR/scripts/train_dflash.py \
+    --target-model-path LiquidAI/LFM2.5-1.2B-Instruct \
+    --draft-config-path $ROOT_DIR/configs/lfm2.5-1.2b-instruct-dflash.json \
+    --train-data-path $ROOT_DIR/cache/dataset/perfectblend_lfm2.5-1.2b_regen.jsonl \
+    --output-dir $ROOT_DIR/outputs/lfm2.5-1.2b-instruct-dflash-perfectblend-8layers \
+    --num-epochs 6 \
+    --batch-size 2 \
+    --accumulation-steps 2 \
+    --learning-rate 6e-4 \
+    --warmup-ratio 0.04 \
+    --max-grad-norm 1.0 \
+    --max-length 4096 \
+    --chat-template lfm \
+    --attention-backend $ATTENTION_BACKEND \
+    --num-anchors 512 \
+    --loss-decay-gamma 7.0 \
+    --log-interval 50 \
+    --save-interval 10000 \
+    --report-to wandb \
+    --wandb-project specforge-lfm2.5-1.2b-instruct-dflash \
+    --target-model-backend sglang \
+    --block-size 16 \
+    --num-anchors 512 \
+    --wandb-name lfm2.5-1.2b-instruct-dflash-perfectblend-8layers \
+    # --resume
@@ -119,6 +119,16 @@ def get_all_template_names(self) -> List[str]:
     ),
 )
 
+TEMPLATE_REGISTRY.register(
+    name="lfm",
+    template=ChatTemplate(
+        assistant_header="<|im_start|>assistant\n",
+        user_header="<|im_start|>user\n",
+        system_prompt="",
+        end_of_turn_token="<|im_end|>\n",
+    ),
+)
+
 TEMPLATE_REGISTRY.register(
     name="qwen2-vl",
     template=ChatTemplate(