meta-pytorch · HosseinKaviani-H · Dec 23, 2025
diff --git a/apps/sft/llama3_8b.yaml b/apps/sft/llama3_8b.yaml
@@ -26,12 +26,14 @@ optimizer:
 lr_scheduler:
   warmup_steps: 200
 
+compile:
+  enable: false
+
 training:
   local_batch_size: 8
   seq_len: 2048
   max_norm: 1.0
   steps: 1000
-  compile: false
   datasets:
     - path: "yahma/alpaca-cleaned"
       split: "train[:95%]"

diff --git a/apps/sft/main.py b/apps/sft/main.py
@@ -96,11 +96,11 @@ def record_batch_metrics(self, data_metrics: list):
     @endpoint
     async def setup(self):
         # Validate that compile is only used with flex attention
-        if self.job_config.training.compile:
+        if self.job_config.compile.enable:
             raise ValueError(
-                "training.compile=True is not currently supported. "
+                "compile.enable=True is not currently supported. "
                 "Compile is only supported with flex attention enabled, which requires PyTorch nightly. "
-                "Please set training.compile=false in your config."
+                "Please set compile.enable=false in your config."
             )
 
         # all ranks should record loss, except when PP=True. Then, only the last stage should record loss.

diff --git a/apps/sft/qwen3_8b.yaml b/apps/sft/qwen3_8b.yaml
@@ -25,12 +25,14 @@ optimizer:
 lr_scheduler:
   warmup_steps: 200
 
+compile:
+  enable: false
+
 training:
   local_batch_size: 8
   seq_len: 2048
   max_norm: 1.0
   steps: 1000
-  compile: false
   datasets:
     - path: "yahma/alpaca-cleaned"
       split: "train[:95%]"