fix(docs): add language specifier to fenced code block in README

ChenhanYu · ChenhanYu · commit bf29e693de4e · 2026-04-07T13:53:47.000-07:00
diff --git a/examples/dataset/README.md b/examples/dataset/README.md
@@ -26,7 +26,7 @@ speculative decoding draft-model training, etc.
 ```bash
 pip install datasets huggingface_hub pyyaml
 huggingface-cli login   # required for gated datasets
-```
+```text
 
 ### Build a Nemotron PT v3 dataset
 
@@ -39,20 +39,20 @@ python make_nemotron_ptv3_dataset.py --mode train --output-dir /tmp/ptv3_train
 
 # Use a custom dataset mix
 python make_nemotron_ptv3_dataset.py --config my_mix.yaml --output-dir /tmp/ptv3_custom
-```
+```text
 
 ### Build a Nemotron PT v2 dataset
 
 ```bash
 python make_nemotron_ptv2_dataset.py --output-dir /tmp/ptv2_gen
 python make_nemotron_ptv2_dataset.py --mode train --output-dir /tmp/ptv2_train
-```
+```text
 
 ### Build a general-purpose mixed dataset
 
 ```bash
 python make_dataset.py --config example_data_config.yaml --output-dir /tmp/mixed
-```
+```text
 
 ## Dataset Modes
 
@@ -69,13 +69,13 @@ The `generate` mode produces conversation skeletons that are fed to a target mod
 via `tools/launcher/common/query.py` (vLLM or TRT-LLM).  The output becomes training
 data for a draft model (e.g. EAGLE3 speculative decoding) or a distilled student:
 
-```
+```text
 make_nemotron_ptv3_dataset.py --mode generate  →  skeleton.jsonl
         ↓
 query.py  (target model generates responses turn-by-turn)
         ↓
 training data for draft model / student
-```
+```text
 
 ## Augmentations
 
@@ -95,7 +95,7 @@ augmentations:
   - type: system_prompt
     content: "You are a helpful assistant."
     enabled: false   # disable without deleting
-```
+```text
 
 ## Dataset Mix Config (`nemotron_ptv3_datasets.yaml`)
 
@@ -111,7 +111,7 @@ datasets:
   - repo_id: nvidia/OpenMathReasoning-mini
     splits: [train]
     augment: false   # multilingual — skip language-redirect augmentation
-```
+```text
 
 ## Output Format
 
@@ -123,6 +123,6 @@ Every output row is a JSONL object with a single `messages` key:
   {"role": "user",      "content": "What is 2+2?"},
   {"role": "assistant", "content": "4"}
 ]}
-```
+```text
 
 In `generate` mode, assistant turns are stripped so the row ends with a user turn.