trustyai-explainability · m-misiura · Mar 5, 2026 · Feb 17, 2026 · Feb 17, 2026 · Feb 20, 2026
diff --git a/.github/workflows/docs-build.yaml b/.github/workflows/docs-build.yaml
@@ -46,8 +46,10 @@ jobs:
               run: poetry config virtualenvs.in-project true
             - name: Install dependencies
               run: poetry install --with docs
+            - name: Validate redirect targets
+              run: make docs-check-redirects
             - name: Build documentation
-              run: make docs
+              run: make docs-strict
             - name: Delete unnecessary files
               run: |
                   sudo find _build -name .doctrees -prune -exec rm -rf {} \;

diff --git a/Makefile b/Makefile
@@ -1,4 +1,4 @@
-.PHONY: all test tests test_watch test_coverage test_profile docs docs-serve docs-update-cards docs-check-cards docs-watch-cards pre_commit help
+.PHONY: all test tests test_watch test_coverage test_profile docs docs-strict docs-serve docs-update-cards docs-check-cards docs-watch-cards pre_commit help
 
 # Default target executed when no specific target is provided to make.
 all: help
@@ -24,6 +24,9 @@ test_profile:
 docs:
 	poetry run sphinx-build -b html docs _build/docs
 
+docs-strict:
+	poetry run sphinx-build -b html -W --keep-going docs _build/docs
+
 docs-serve:
 	cd docs && poetry run sphinx-autobuild . _build/html --port 8000 --open-browser
 
@@ -36,6 +39,9 @@ docs-check-cards:
 docs-watch-cards:
 	cd docs && poetry run python scripts/update_cards/update_cards.py watch
 
+docs-check-redirects:
+	cd docs && poetry run python scripts/validate_redirects.py
+
 pre_commit:
 	pre-commit install
 	pre-commit run --all-files
@@ -51,8 +57,10 @@ help:
 	@echo 'test_watch                   - run unit tests in watch mode'
 	@echo 'test_coverage                - run unit tests with coverage'
 	@echo 'docs                         - build docs, if you installed the docs dependencies'
+	@echo 'docs-strict                  - build docs with warnings as errors (used in CI)'
 	@echo 'docs-serve                   - serve docs locally with auto-rebuild on changes'
 	@echo 'docs-update-cards            - update grid cards in index files from linked pages'
 	@echo 'docs-check-cards             - check if grid cards are up to date (dry run)'
 	@echo 'docs-watch-cards             - watch for file changes and auto-update cards'
+	@echo 'docs-check-redirects         - validate that all redirect targets exist'
 	@echo 'pre_commit                   - run pre-commit hooks'
diff --git a/benchmark/Procfile b/benchmark/Procfile
@@ -1,7 +1,7 @@
 # Procfile
 
 # NeMo Guardrails server
-gr: poetry run nemoguardrails server --config ../examples/configs/content_safety_local --default-config-id content_safety_local --port 9000
+gr: MAIN_MODEL_ENGINE=nim MAIN_MODEL_BASE_URL=http://localhost:8000 poetry run nemoguardrails server --config ../examples/configs/content_safety_local --default-config-id content_safety_local --port 9000
 
 # Guardrails NIMs for inference. PYTHONPATH is set to the project root so absolute imports work
 app_llm: PYTHONPATH=.. python mock_llm_server/run_server.py --workers 4 --port 8000 --config-file mock_llm_server/configs/meta-llama-3.3-70b-instruct.env

diff --git a/benchmark/aiperf/configs/sweep_concurrency_benchmark.yaml b/benchmark/aiperf/configs/sweep_concurrency_benchmark.yaml
@@ -0,0 +1,37 @@
+# Benchmarking AIPerf configuration to test locally-running Guardrails
+
+# Name for this batch of benchmarks (will be part of output directory name)
+batch_name: sweep_concurrency_benchmark
+
+# Base directory where all benchmark results will be stored.
+# Actual name is <output_base_dir>/<batch_name>/<sweep value> for sweeps
+output_base_dir: aiperf_results
+
+# Base configuration applied to all benchmark runs
+# These parameters can be overridden by sweep parameters
+base_config:
+  # Model details
+  model: meta/llama-3.3-70b-instruct
+  tokenizer: meta-llama/Llama-3.3-70B-Instruct
+  url: "http://localhost:9000"
+  endpoint: "/v1/chat/completions"
+  endpoint_type: chat
+
+  # Load generation settings.
+  warmup_request_count: 10
+  benchmark_duration: 60
+  concurrency: 0  # Overridden by the concurrency sweep below
+  request_rate_mode: "constant"
+
+  # Synthetic data generation
+  random_seed: 12345
+  prompt_input_tokens_mean: 100
+  prompt_input_tokens_stddev: 10
+  prompt_output_tokens_mean: 50
+  prompt_output_tokens_stddev: 5
+
+# Parameter sweeps. Each parameter can have multiple values
+# The script will run all combinations (Cartesian product)
+sweeps:
+  # Sweep over the following concurrency values
+  concurrency: [1, 2, 4, 8, 16, 32, 64, 128, 256]
diff --git a/benchmark/locust/README.md b/benchmark/locust/README.md
@@ -0,0 +1,111 @@
+# Locust Load Testing for NeMo Guardrails
+
+This directory contains a Locust-based load testing framework for the NeMo Guardrails OpenAI-compatible server.
+
+## Introduction
+
+The [Locust](https://locust.io/) stress-testing tool ramps up concurrent users making API calls to the `/v1/chat/completions` endpoint of an OpenAI-compatible LLM with configurable parameters.
+This complements [ai-perf](https://github.com/ai-dynamo/aiperf), which measures steady-state performance.  Locust instead focuses on ramping up load potentially beyond what a system can handle, and measure how gracefully it degrades under higher-than-expected load.
+
+## Getting Started
+
+### Prerequisites
+
+These steps have been tested with Python 3.11.11.
+
+1. **Create a virtual environment in which to install Locust and other benchmarking tools**
+
+   ```bash
+   $ mkdir ~/env
+   $ python -m venv ~/env/benchmark_env
+   ```
+
+2. **Activate environment and install dependencies in the virtual environment**
+
+   ```bash
+   $ source ~/env/benchmark_env/bin/activate
+   (benchmark_env) $ pip install -r benchmark/requirements.txt
+   ```
+
+## Running Benchmarks
+
+The Locust benchmarks uses YAML configuration file to configure load-testing parameters.
+To get started and load-test a model hosted at `http://localhost:8000`, use the following command.
+Set `headless: false` in your YAML config to use Locust's interactive web UI. Then open http://localhost:8089 to control the test and view real-time metrics.
+
+   ```bash
+   (benchmark_env) $ python -m benchmark.locust benchmark/locust/configs/local.yaml
+   ```
+
+### CLI Options
+
+The `benchmark.locust` CLI supports the following options:
+
+```bash
+python -m benchmark.locust [OPTIONS] CONFIG_FILE
+```
+
+**Arguments:**
+- `CONFIG_FILE`: Path to YAML configuration file (required)
+
+**Options:**
+- `--dry-run`: Print commands without executing them
+- `--verbose`: Enable verbose logging and debugging information
+
+## Configuration Options
+
+All configuration is done via YAML files. The following fields are supported:
+
+### Required Fields
+
+- `config_id`: Guardrails configuration ID to use
+- `model`: Model name to send in requests
+
+### Optional Fields
+
+- `host`: Server base URL (default: `http://localhost:8000`)
+- `users`: Maximum concurrent users (default: `256`, minimum: `1`)
+- `spawn_rate`: Users spawned per second (default: `10`, minimum: `0.1`)
+- `run_time`: Test duration in seconds (default: `60`, minimum: `1`)
+- `message`: Message content to send (default: `"Hello, what can you do?"`)
+- `headless`: Run without web UI (default: `true`)
+- `output_base_dir`: Directory for test results (default: `"locust_results"`)
+
+## Load Test Behavior
+
+- **Request Type**: 100% POST `/v1/chat/completions` requests
+- **Wait Time**: Zero wait time between requests (continuous hammering)
+- **Ramp-up**: Users spawn gradually at the specified `spawn_rate`
+- **Message Content**: Static message content (configurable via `message` field)
+
+## Output
+
+### Headless Mode
+
+When run in headless mode, results are saved to timestamped directories:
+
+```
+locust_results/
+└── YYYYMMDD_HHMMSS/
+    ├── report.html          # HTML report with charts
+    ├── run_metadata.json    # Test configuration metadata
+    ├── stats.csv            # Request statistics
+    ├── stats_failures.csv   # Failure statistics
+    └── stats_history.csv    # Statistics over time
+```
+
+### Web UI Mode
+
+Real-time metrics are displayed in the web interface at http://localhost:8089, including:
+- Requests per second (RPS)
+- Response time percentiles (50th, 95th, 99th)
+- Failure rate
+- Number of users
+
+### Troubleshooting
+
+If you see validation errors:
+- Ensure all required fields (`config_id`, `model`) are present in your YAML config
+- Check that the `config_id` matches a configuration on your server
+- Verify that numeric values meet minimum requirements (e.g., `users >= 1`, `spawn_rate >= 0.1`)
+- Ensure `host` starts with `http://` or `https://`
diff --git a/benchmark/locust/__init__.py b/benchmark/locust/__init__.py
@@ -0,0 +1,15 @@
+#!/usr/bin/env python3
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/benchmark/locust/__main__.py b/benchmark/locust/__main__.py
@@ -0,0 +1,22 @@
+#!/usr/bin/env python3
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Entry point for running the Locust load test CLI as a module: python -m benchmark.locust"""
+
+from benchmark.locust.run_locust import app
+
+if __name__ == "__main__":
+    app()
diff --git a/benchmark/locust/configs/local.yaml b/benchmark/locust/configs/local.yaml
@@ -0,0 +1,18 @@
+# Example Locust load test configuration for NeMo Guardrails
+
+# Server details
+host: "http://localhost:8000"
+config_id: "my-guardrails-config"
+model: "meta/llama-3.3-70b-instruct"
+
+# Load test parameters
+users: 1024             # Maximum number of concurrent users
+spawn_rate: 16           # Users spawned per second
+run_time: 120            # Test duration in seconds
+
+# Request configuration
+message: "Hello, what can you do?"
+
+# Output configuration
+headless: true          # Set to true for headless mode, false for web UI
+output_base_dir: "locust_results"  # Directory for test results
diff --git a/benchmark/locust/locust_models.py b/benchmark/locust/locust_models.py
@@ -0,0 +1,78 @@
+#!/usr/bin/env python3
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Pydantic models for Locust load test configuration validation.
+"""
+
+from pydantic import BaseModel, ConfigDict, Field, field_validator
+
+
+class LocustConfig(BaseModel):
+    """Configuration for a Locust load-test run"""
+
+    model_config = ConfigDict(extra="forbid")
+
+    # Server details
+    host: str = Field(
+        default="http://localhost:8000",
+        description="Base URL of the NeMo Guardrails server to test",
+    )
+    config_id: str = Field(..., description="Guardrails configuration ID to use")
+    model: str = Field(..., description="Model name to use in requests")
+
+    # Load test parameters
+    users: int = Field(
+        default=256,
+        ge=1,
+        description="Maximum number of concurrent users",
+    )
+    spawn_rate: float = Field(
+        default=10,
+        ge=0.1,
+        description="Rate at which users are spawned (users/second)",
+    )
+    run_time: int = Field(
+        default=60,
+        ge=1,
+        description="Test duration in seconds",
+    )
+
+    # Request configuration
+    message: str = Field(
+        default="Hello, what can you do?",
+        description="Message content to send in chat completion requests",
+    )
+
+    # Output configuration
+    headless: bool = Field(
+        default=True,
+        description="Run in headless mode without web UI",
+    )
+
+    output_base_dir: str = Field(
+        default="locust_results",
+        description="Base directory for load test results",
+    )
+
+    @field_validator("host")
+    @classmethod
+    def validate_host(cls, v: str) -> str:
+        """Ensure host starts with http:// or https://"""
+        if not v.startswith(("http://", "https://")):
+            raise ValueError("Host must start with http:// or https://")
+        # Remove trailing slash if present
+        return v.rstrip("/")