diff --git a/.claude/skills/evaluation/SKILL.md b/.claude/skills/evaluation/SKILL.md
index cf93c392fa6..16f1b2cb6a5 100644
--- a/.claude/skills/evaluation/SKILL.md
+++ b/.claude/skills/evaluation/SKILL.md
@@ -48,7 +48,7 @@ Run `nel --version`; if missing, instruct `pip install nemo-evaluator-launcher`.
 1. Read the task reference file(s).
 2. Use `recipes/examples/example_eval.yaml` as the base.
 3. Copy the YAML fragment(s) into `evaluation.tasks`, applying any per-task notes.
-4. **MLflow auto-export is on by default** — copy the `export.mlflow` block from `example_eval.yaml` verbatim. The defaults inside that block (Hydra-interpolated `experiment_name`, `description`, `tags`) only need `tracking_uri` filled in Step 4. See `example_eval.yaml` for the canonical block.
+4. **MLflow auto-export is on by default** — it needs **two** pieces, both in `example_eval.yaml`: (a) the **trigger** `execution.auto_export.destinations: [mlflow]` (without it the run is *not* uploaded), and (b) the `export.mlflow` block that configures it. In the `export.mlflow` block use **literal** values for `experiment_name` / `description` / `tags` — substitute the actual `served_model_name` and sampling params. Do **not** use `${deployment.*}` / `${evaluation.*}` cross-references: with auto-export on, NEL resolves the export block at submit time in a scope without those nodes and fails with `Interpolation key '...' not found` (`${oc.env:USER}` is fine — it's an env var). Because these literals can't interpolate, keep the `temperature` / `top_p` / `max_new_tokens` tags **equal to** the top-level `params` and update both in the same edit — they're the only queryable record of sampling in MLflow (NEL doesn't log them as run params), so a stale tag silently misreports the run. Fill `tracking_uri` in Step 4.
 5. Proceed to Step 3, then Step 4, then Step 7.5/8. Skip Step 2's 5-question flow.
 
 ---
diff --git a/.claude/skills/evaluation/recipes/examples/example_eval.yaml b/.claude/skills/evaluation/recipes/examples/example_eval.yaml
index 0b6ea6b2e9b..c3a48d8c58b 100644
--- a/.claude/skills/evaluation/recipes/examples/example_eval.yaml
+++ b/.claude/skills/evaluation/recipes/examples/example_eval.yaml
@@ -45,9 +45,9 @@ execution:
   walltime: "04:00:00"
   mounts:
     mount_home: false
-  auto_export:
-    destinations:
-      - mlflow
+  auto_export:          # REQUIRED trigger for auto-export. Without this, the
+    destinations:       # export.mlflow block below is ignored and the run is
+      - mlflow          # NOT uploaded — you'd have to `nel export` it by hand.
 deployment:
   env_vars:
     HF_TOKEN: host:HF_TOKEN
@@ -95,18 +95,29 @@ evaluation:
               n_samples: 16
 
 export:
+  # Use LITERAL values below — NOT ${deployment.*} / ${evaluation.*} cross-refs.
+  # With auto_export enabled (above), NEL resolves this block at SUBMIT time in a
+  # scope that does NOT include `deployment` / `evaluation`, so cross-references
+  # fail hard: "Interpolation key 'deployment.served_model_name' not found".
+  # `${oc.env:USER}` (an env-var interpolation) is fine.
+  #
+  # CAUTION — these literals can drift. temperature / top_p / max_new_tokens are the
+  # ONLY queryable record of the sampling config in MLflow (NEL does not log them as
+  # run params), so keep them — but because they can't be interpolated, they MUST be
+  # kept EQUAL to evaluation.nemo_evaluator_config.config.params above. When you
+  # change the sampling params (or served_model_name), update these literals in the
+  # SAME edit, or MLflow will misreport the run.
   mlflow:
     tracking_uri: ???
-    experiment_name: ${oc.env:USER}/${deployment.served_model_name}
-    description: '${oc.env:USER}/${deployment.served_model_name} | T=${evaluation.nemo_evaluator_config.config.params.temperature}, top_p=${evaluation.nemo_evaluator_config.config.params.top_p},
-      max_new_tokens=${evaluation.nemo_evaluator_config.config.params.max_new_tokens}'
+    experiment_name: ${oc.env:USER}/CHANGEME-served-model-name
+    description: 'CHANGEME-served-model-name | T=1.0, top_p=0.95, max_new_tokens=65536'
     log_logs: true
     log_artifacts: true
     only_required: false
     skip_existing: false
     tags:
       framework: vllm
-      model: ${deployment.served_model_name}
-      temperature: '${evaluation.nemo_evaluator_config.config.params.temperature}'
-      top_p: '${evaluation.nemo_evaluator_config.config.params.top_p}'
-      max_new_tokens: '${evaluation.nemo_evaluator_config.config.params.max_new_tokens}'
+      model: CHANGEME-served-model-name
+      temperature: '1.0'
+      top_p: '0.95'
+      max_new_tokens: '65536'