NVIDIA-NeMo
diff --git a/‎.github/workflows/config/.secrets.baseline‎
Lines changed: 75 additions & 2 deletions b/‎.github/workflows/config/.secrets.baseline‎
Lines changed: 75 additions & 2 deletions
diff --git a/‎.github/workflows/fern-docs-preview.yml‎
Lines changed: 0 additions & 115 deletions b/‎.github/workflows/fern-docs-preview.yml‎
Lines changed: 0 additions & 115 deletions
diff --git a/‎README.md‎
Lines changed: 70 additions & 2 deletions b/‎README.md‎
Lines changed: 70 additions & 2 deletions
diff --git a/‎docs/about/release-notes/index.md‎
Lines changed: 23 additions & 0 deletions b/‎docs/about/release-notes/index.md‎
Lines changed: 23 additions & 0 deletions
diff --git a/‎fern/assets/images/architecture-diagram.png‎
743 KB b/‎fern/assets/images/architecture-diagram.png‎
743 KB
diff --git a/‎fern/assets/images/data-curation-challenges.png‎
156 KB b/‎fern/assets/images/data-curation-challenges.png‎
156 KB
@@ -124,7 +124,7 @@
     {
       "path": "detect_secrets.filters.regex.should_exclude_file",
       "pattern": [
-        "pyproject\\.toml|\\.github/workflows/config/\\.secrets\\.baseline"
+        "pyproject\\.toml|\\.github/workflows/config/\\.secrets\\.baseline|fern/product-docs/"
       ]
     }
   ],
@@ -243,6 +243,79 @@
         "line_number": 28
       }
     ],
+    "fern/versions/v26.04/pages/curate-text/synthetic/index.mdx": [
+      {
+        "type": "Secret Keyword",
+        "filename": "fern/versions/v26.04/pages/curate-text/synthetic/index.mdx",
+        "hashed_secret": "6d9c68c603e465077bdd49c62347fe54717f83a3",
+        "is_verified": false,
+        "line_number": 72
+      }
+    ],
+    "fern/versions/v26.04/pages/curate-text/synthetic/inference-server.mdx": [
+      {
+        "type": "Secret Keyword",
+        "filename": "fern/versions/v26.04/pages/curate-text/synthetic/inference-server.mdx",
+        "hashed_secret": "ce7501007f04a6529e650f1f1b3fc0586d1d94eb",
+        "is_verified": false,
+        "line_number": 173
+      }
+    ],
+    "fern/versions/v26.04/pages/curate-text/synthetic/llm-client.mdx": [
+      {
+        "type": "Secret Keyword",
+        "filename": "fern/versions/v26.04/pages/curate-text/synthetic/llm-client.mdx",
+        "hashed_secret": "e6bdb3f031eea3001ca83dd43d7d49d65a7a6ce5",
+        "is_verified": false,
+        "line_number": 32
+      },
+      {
+        "type": "Secret Keyword",
+        "filename": "fern/versions/v26.04/pages/curate-text/synthetic/llm-client.mdx",
+        "hashed_secret": "2083c49ad8d63838a4d18f1de0c419f06eb464db",
+        "is_verified": false,
+        "line_number": 43
+      },
+      {
+        "type": "Secret Keyword",
+        "filename": "fern/versions/v26.04/pages/curate-text/synthetic/llm-client.mdx",
+        "hashed_secret": "ec3810e10fb78db55ce38b9c18d1c3eb1db739e0",
+        "is_verified": false,
+        "line_number": 127
+      },
+      {
+        "type": "Secret Keyword",
+        "filename": "fern/versions/v26.04/pages/curate-text/synthetic/llm-client.mdx",
+        "hashed_secret": "11fa7c37d697f30e6aee828b4426a10f83ab2380",
+        "is_verified": false,
+        "line_number": 134
+      },
+      {
+        "type": "Secret Keyword",
+        "filename": "fern/versions/v26.04/pages/curate-text/synthetic/llm-client.mdx",
+        "hashed_secret": "ce7501007f04a6529e650f1f1b3fc0586d1d94eb",
+        "is_verified": false,
+        "line_number": 155
+      }
+    ],
+    "fern/versions/v26.04/pages/curate-text/synthetic/multilingual-qa.mdx": [
+      {
+        "type": "Secret Keyword",
+        "filename": "fern/versions/v26.04/pages/curate-text/synthetic/multilingual-qa.mdx",
+        "hashed_secret": "2083c49ad8d63838a4d18f1de0c419f06eb464db",
+        "is_verified": false,
+        "line_number": 28
+      }
+    ],
+    "fern/versions/v26.04/pages/curate-text/synthetic/nemo-data-designer.mdx": [
+      {
+        "type": "Secret Keyword",
+        "filename": "fern/versions/v26.04/pages/curate-text/synthetic/nemo-data-designer.mdx",
+        "hashed_secret": "ce7501007f04a6529e650f1f1b3fc0586d1d94eb",
+        "is_verified": false,
+        "line_number": 183
+      }
+    ],
     "nemo_curator/models/nemotron_h_vl.py": [
       {
         "type": "Hex High Entropy String",
@@ -333,5 +406,5 @@
       }
     ]
   },
-  "generated_at": "2026-04-24T18:02:35Z"
+  "generated_at": "2026-04-27T14:07:36Z"
 }
@@ -15,6 +15,11 @@
 
 > *Part of the [NVIDIA NeMo](https://www.nvidia.com/en-us/ai-data-science/products/nemo/) software suite for managing the AI agent lifecycle.*
 
+## Updates
+
+- 2026-04: NeMo Curator 26.04 released with Cosmos-Xenna 0.2.0 upgrade, simplified `Resources` API, and Ray 2.54. See the [release notes](https://docs.nvidia.com/nemo/curator/latest/about/release-notes).
+- 2026-02: NeMo Curator 26.02 released with Ray-based pipeline architecture for all modalities — text, image, video, and audio.
+
 ## What You Can Do
 
 | Modality | Key Capabilities | Get Started |
@@ -38,6 +43,16 @@ python tutorials/quickstart.py
 
 ---
 
+## Architecture
+
+NeMo Curator uses a modular, Ray-based pipeline architecture. Data flows through composable processing stages — each stage handles a discrete curation task (loading, filtering, deduplication, etc.) and can be configured with independent resource requirements.
+
+<p align="center">
+  <img src="./fern/assets/images/architecture-diagram.png" alt="NeMo Curator architecture diagram showing modular pipeline stages" width="700"/>
+</p>
+
+---
+
 ## Features by Modality
 
 ### Text Curation
@@ -92,6 +107,33 @@ Prepare high-quality speech datasets for automatic speech recognition (ASR) and
 
 ---
 
+## Why Data Curation?
+
+High-quality training data is the single most important factor in building performant AI models. Raw datasets contain noise, duplicates, low-quality content, and potentially harmful material that degrade model performance and increase training costs.
+
+<p align="center">
+  <img src="./fern/assets/images/data-curation-challenges.png" alt="Common data curation challenges: quality, deduplication, filtering, and scale" width="700"/>
+</p>
+
+At scale, data curation is a **throughput maximization problem**. A typical pipeline chains stages with very different compute profiles — lightweight CPU tokenization, small GPU classifiers, large GPU inference models — and a naive sequential approach leaves most hardware idle most of the time.
+
+**Example:** Consider a pipeline with language identification (0.5B model, 1 GB VRAM, 2s/sample), tokenization (CPU-only, 1s/sample), and a 5B answer model (10 GB VRAM, 10s/sample) processing 1,000 questions on a single 102 GB GPU:
+
+| Approach | How it works | Total runtime |
+|----------|-------------|---------------|
+| **Sequential** | Process each sample through all stages, one at a time | ~13,000 seconds |
+| **NeMo Curator** | Stream batches, auto-scale replicas per stage, overlap CPU/GPU work | ~1,000 seconds |
+
+NeMo Curator achieves this by streaming data through the pipeline so all stages run concurrently, auto-balancing replicas to match each stage's throughput (2× language ID, 1× tokenizer, 10× answer model), and keeping GPU workers busy over 99% of the time after an initial warm-up period. See the [scaling concepts](https://docs.nvidia.com/nemo/curator/latest/about/concepts/scaling) for details.
+
+---
+
+## Proven at Scale: Nemotron
+
+NeMo Curator powers the data pipelines behind [NVIDIA Nemotron](https://developer.nvidia.com/nemotron) models. For example, the [Nemotron-4 pre-training dataset](https://arxiv.org/abs/2402.16819) was curated using NeMo Curator's text processing pipeline across 8+ trillion tokens of multilingual web data, applying quality filtering, deduplication, and domain classification at scale.
+
+---
+
 ## Why NeMo Curator?
 
 ### Performance at Scale
@@ -106,15 +148,15 @@ NeMo Curator leverages NVIDIA RAPIDS™ libraries such as cuDF, cuML, and cuGrap
 **Real-World Recipe:** The [Nemotron-CC curation pipeline](https://github.com/NVIDIA-NeMo/Nemotron/tree/main/src/nemotron/recipes/data_curation/nemotron-cc) uses NeMo Curator end-to-end — from Common Crawl extraction through language identification, exact/fuzzy/substring deduplication, ensemble quality classification, and LLM-based synthetic data generation — to reproduce the [Nemotron-CC datasets](https://huggingface.co/datasets/nvidia/Nemotron-CC-v2). The SDG stage is also available as an [in-repo tutorial](tutorials/synthetic/nemotron_cc/).
 
 <p align="center">
-  <img src="./docs/_images/text-benchmarks.png" alt="Performance benchmarks showing 16x speed improvement, 40% cost savings, and near-linear scaling" width="700"/>
+  <img src="./fern/assets/images/text-benchmarks.png" alt="Performance benchmarks showing 16x speed improvement, 40% cost savings, and near-linear scaling" width="700"/>
 </p>
 
 ### Quality Improvements
 
 Data curation modules measurably improve model performance. In ablation studies using a 357M-parameter GPT model trained on curated Common Crawl data:
 
 <p align="center">
-  <img src="./docs/_images/ablation.png" alt="Model accuracy improvements across curation pipeline stages" width="700"/>
+  <img src="./fern/assets/images/ablation.png" alt="Model accuracy improvements across curation pipeline stages" width="700"/>
 </p>
 
 **Results:** Progressive improvements in zero-shot downstream task performance through text cleaning, deduplication, and quality filtering stages.
@@ -136,3 +178,29 @@ Data curation modules measurably improve model performance. In ablation studies
 ## Contribute
 
 We welcome community contributions! Please refer to [CONTRIBUTING.md](https://github.com/NVIDIA/NeMo/blob/stable/CONTRIBUTING.md) for guidelines.
+
+---
+
+## Citation
+
+If you find NeMo Curator useful in your research, please cite:
+
+```bibtex
+@misc{nemo_curator,
+  title = {NeMo Curator: GPU-Accelerated Data Curation for Training AI Models},
+  author = {NVIDIA},
+  year = {2024},
+  url = {https://github.com/NVIDIA-NeMo/Curator}
+}
+```
+
+For the data curation pipeline behind Nemotron models, please also cite:
+
+```bibtex
+@article{parmar2024nemotron4,
+  title = {Nemotron-4 15B Technical Report},
+  author = {Parmar, Jupinder and Satheesh, Shrimai and others},
+  journal = {arXiv preprint arXiv:2402.16819},
+  year = {2024}
+}
+```
@@ -12,6 +12,16 @@ modality: "universal"
 
 # NeMo Curator Release Notes: {{ current_release }}
 
+## What's New in 26.04
+
+### Cosmos-Xenna 0.2.0
+
+Upgraded Cosmos-Xenna from 0.1.2 to 0.2.0 with a simplified resource model and improved GPU management:
+
+- **Simplified `Resources` API**: Removed `nvdecs`, `nvencs`, and `entire_gpu` fields. GPU allocation now uses `gpu_memory_gb` (fractional single-GPU) or `gpus` (one or more full GPUs) exclusively.
+- **Xenna-managed CUDA devices**: Xenna now manages CUDA device visibility directly, replacing the previous Ray-managed approach.
+- **Ray 2.54**: Updated Ray dependency to version 2.54 for compatibility with Cosmos-Xenna 0.2.0.
+
 ## What's New in 26.02
 
 ### Benchmarking Infrastructure
@@ -106,6 +116,13 @@ New API for tracking and analyzing pipeline execution:
 
 ## Dependency Updates
 
+### 26.04
+
+- **Cosmos-Xenna**: Updated from 0.1.2 to 0.2.0 with simplified resource model
+- **Ray**: Updated to 2.54
+
+### 26.02
+
 - **Transformers**: Pinned to 4.55.2 for stability and compatibility
 - **vLLM**: Updated to 0.14.1 with video pipeline compatibility fixes
 - **FFmpeg**: Upgraded to 8.0.1 for enhanced multimedia processing
@@ -136,6 +153,12 @@ New API for tracking and analyzing pipeline execution:
 
 ## Breaking Changes
 
+### 26.04
+
+- **`Resources` API**: The `nvdecs`, `nvencs`, and `entire_gpu` fields have been removed from `Resources`. Stages that previously used `entire_gpu=True` should use `gpus=1` instead. Stages that used `nvdecs` or `nvencs` should use `gpus` for GPU allocation.
+
+### 26.02
+
 - **InternVideo2 Removed**: Video pipelines must use alternative embedding models (Cosmos-Embed1)
 - **ID Field Standardization**: Custom deduplication workflows may need updates to use standardized ID field names