NVIDIA · cjluo-nv · Jun 1, 2026 · Jun 1, 2026 · Jun 1, 2026
diff --git a/.claude/skills/evaluation/SKILL.md b/.claude/skills/evaluation/SKILL.md
@@ -48,7 +48,7 @@ Run `nel --version`; if missing, instruct `pip install nemo-evaluator-launcher`.
 1. Read the task reference file(s).
 2. Use `recipes/examples/example_eval.yaml` as the base.
 3. Copy the YAML fragment(s) into `evaluation.tasks`, applying any per-task notes.
-4. **MLflow auto-export is on by default** — copy the `export.mlflow` block from `example_eval.yaml` verbatim. The defaults inside that block (Hydra-interpolated `experiment_name`, `description`, `tags`) only need `tracking_uri` filled in Step 4. See `example_eval.yaml` for the canonical block.
+4. **MLflow auto-export is on by default** — it needs **two** pieces, both in `example_eval.yaml`: (a) the **trigger** `execution.auto_export.destinations: [mlflow]` (without it the run is *not* uploaded), and (b) the `export.mlflow` block that configures it. In the `export.mlflow` block use **literal** values for `experiment_name` / `description` / `tags` — substitute the actual `served_model_name` and sampling params. Do **not** use `${deployment.*}` / `${evaluation.*}` cross-references: with auto-export on, NEL resolves the export block at submit time in a scope without those nodes and fails with `Interpolation key '...' not found` (`${oc.env:USER}` is fine — it's an env var). Fill `tracking_uri` in Step 4.
 5. Proceed to Step 3, then Step 4, then Step 7.5/8. Skip Step 2's 5-question flow.
 
 ---

diff --git a/.claude/skills/evaluation/recipes/examples/example_eval.yaml b/.claude/skills/evaluation/recipes/examples/example_eval.yaml
@@ -45,9 +45,9 @@ execution:
   walltime: "04:00:00"
   mounts:
     mount_home: false
-  auto_export:
-    destinations:
-      - mlflow
+  auto_export:          # REQUIRED trigger for auto-export. Without this, the
+    destinations:       # export.mlflow block below is ignored and the run is
+      - mlflow          # NOT uploaded — you'd have to `nel export` it by hand.
 deployment:
   env_vars:
     HF_TOKEN: host:HF_TOKEN
@@ -95,18 +95,23 @@ evaluation:
               n_samples: 16
 
 export:
+  # Use LITERAL values below — NOT ${deployment.*} / ${evaluation.*} cross-refs.
+  # With auto_export enabled (above), NEL resolves this block at SUBMIT time in a
+  # scope that does NOT include `deployment` / `evaluation`, so cross-references
+  # fail hard: "Interpolation key 'deployment.served_model_name' not found".
+  # `${oc.env:USER}` (an env-var interpolation) is fine. When generating a config,
+  # substitute the actual served_model_name and the sampling params from above.
   mlflow:
     tracking_uri: ???
-    experiment_name: ${oc.env:USER}/${deployment.served_model_name}
-    description: '${oc.env:USER}/${deployment.served_model_name} | T=${evaluation.nemo_evaluator_config.config.params.temperature}, top_p=${evaluation.nemo_evaluator_config.config.params.top_p},
-      max_new_tokens=${evaluation.nemo_evaluator_config.config.params.max_new_tokens}'
+    experiment_name: ${oc.env:USER}/CHANGEME-served-model-name
+    description: 'CHANGEME-served-model-name | T=1.0, top_p=0.95, max_new_tokens=65536'
     log_logs: true
     log_artifacts: true
     only_required: false
     skip_existing: false
     tags:
       framework: vllm
-      model: ${deployment.served_model_name}
-      temperature: '${evaluation.nemo_evaluator_config.config.params.temperature}'
-      top_p: '${evaluation.nemo_evaluator_config.config.params.top_p}'
-      max_new_tokens: '${evaluation.nemo_evaluator_config.config.params.max_new_tokens}'
+      model: CHANGEME-served-model-name
+      temperature: '1.0'
+      top_p: '0.95'
+      max_new_tokens: '65536'