diff --git a/.github/actions/test-template/action.yml b/.github/actions/test-template/action.yml
index 8727b366f4..a8220769a1 100644
--- a/.github/actions/test-template/action.yml
+++ b/.github/actions/test-template/action.yml
@@ -41,19 +41,6 @@ inputs:
     description: "Run tests on CPU only"
     required: false
     default: "false"
-  azure-client-id:
-    description: "Azure Client ID"
-    required: true
-  azure-tenant-id:
-    description: "Azure Tenant ID"
-    required: true
-  azure-subscription-id:
-    description: "Azure Subscription ID"
-    required: true
-  has-azure-credentials:
-    description: "Has Azure credentials"
-    required: false
-    default: "false"
   is_fork_pr:
     description: "Whether this is a pull request from a fork"
     required: false
@@ -77,19 +64,9 @@ inputs:
 runs:
   using: "composite"
   steps:
-    - name: Install Azure CLI
-      if: ${{ inputs.has-azure-credentials == 'true' }}
-      shell: bash
-      run: |
-        for i in 1 2 3; do
-          curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash && break
-          echo "Attempt $i failed, retrying in 10s..."
-          sleep 10
-        done
-
     - name: Install uuidgen
       shell: bash -x -e -u -o pipefail {0}
-      if: ${{ contains(inputs.runner, 'gcp') }}
+      if: ${{ contains(inputs.runner, 'aws') || contains(inputs.runner, 'gcp') }}
       run: |
         for i in 1 2 3; do
           apt-get update && apt-get install -y uuid-runtime && break
@@ -97,11 +74,6 @@ runs:
           sleep 10
         done
 
-    - name: Docker system cleanup
-      shell: bash
-      run: |
-        docker system prune -af --filter "until=48h" --force || true
-
     - name: Docker pull image
       shell: bash
       run: |
@@ -138,6 +110,7 @@ runs:
         docker run --rm -u root --runtime=nvidia --gpus all \
           --shm-size=64g \
           --env TRANSFORMERS_OFFLINE=0 \
+          --env GHA_RUNNER=${{ inputs.runner }} \
           --env HYDRA_FULL_ERROR=1 \
           --env HF_HOME=/home/TestData/nemo-rl/hf_home \
           --env HF_DATASETS_CACHE=/home/TestData/nemo-rl/hf_datasets_cache \
diff --git a/.github/workflows/_build_container.yml b/.github/workflows/_build_container.yml
new file mode 100644
index 0000000000..4b6e527e06
--- /dev/null
+++ b/.github/workflows/_build_container.yml
@@ -0,0 +1,172 @@
+# Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+name: Build container
+
+on:
+  workflow_call:
+    inputs:
+      build-ref:
+        required: false
+        default: ${{ github.sha }}
+        description: Ref, branch, or SHA to build.
+        type: string
+      image-name:
+        required: true
+        description: Name of the image to build and push.
+        type: string
+      build-args:
+        required: false
+        default: ""
+        description: Additional Docker build args.
+        type: string
+      build-contexts:
+        required: false
+        default: ""
+        description: Additional Docker build contexts.
+        type: string
+      dockerfile:
+        required: true
+        description: Path to the Dockerfile.
+        type: string
+      platform:
+        required: true
+        description: Docker build platform.
+        type: string
+      runner:
+        required: true
+        description: Runner to use for the build.
+        type: string
+      registry:
+        required: true
+        description: Container registry to push to.
+        type: string
+      target:
+        required: false
+        default: ""
+        description: Dockerfile stage to build.
+        type: string
+
+permissions:
+  contents: read
+  pull-requests: read
+
+defaults:
+  run:
+    shell: bash -x -e -u -o pipefail {0}
+
+jobs:
+  build:
+    runs-on: ${{ inputs.runner }}
+    env:
+      REGISTRY: ${{ inputs.registry }}
+      IMAGE_NAME: ${{ inputs.image-name }}
+      GH_REF: ${{ github.ref }}
+      RUN_ID: ${{ github.run_id }}
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v6
+        with:
+          ref: ${{ inputs.build-ref }}
+          submodules: recursive
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Get recently merged PR cache refs
+        id: recent_pr_cache_refs
+        uses: actions/github-script@v8
+        env:
+          REGISTRY: ${{ inputs.registry }}
+          IMAGE_NAME: ${{ inputs.image-name }}
+        with:
+          script: |
+            const [owner, repo] = process.env.GITHUB_REPOSITORY.split("/");
+            const result = await github.graphql(`
+              query($owner: String!, $repo: String!) {
+                repository(owner: $owner, name: $repo) {
+                  pullRequests(states: MERGED, first: 100, orderBy: {field: UPDATED_AT, direction: DESC}) {
+                    nodes {
+                      number
+                    }
+                  }
+                }
+              }
+            `, { owner, repo });
+
+            const refs = result.repository.pullRequests.nodes
+              .map(({ number }) => `type=registry,ref=${process.env.REGISTRY}/${process.env.IMAGE_NAME}:${number}-buildcache,mode=max`)
+              .join("\n");
+
+            core.setOutput("cache-from", refs);
+            core.info(`Found ${result.repository.pullRequests.nodes.length} recently merged PR cache refs.`);
+
+      - name: Compute build metadata
+        id: build_meta
+        shell: bash
+        run: |
+          set -euo pipefail
+
+          PR_NUMBER=""
+          if [[ "$GH_REF" =~ refs/heads/pull-request/([0-9]+) ]]; then
+            PR_NUMBER="${BASH_REMATCH[1]}"
+          fi
+
+          TAGS=("$REGISTRY/$IMAGE_NAME:$RUN_ID")
+          if [[ "$GH_REF" == "refs/heads/main" ]]; then
+            CACHE_KEY="main"
+            TAGS+=("$REGISTRY/$IMAGE_NAME:main")
+          elif [[ -n "$PR_NUMBER" ]]; then
+            CACHE_KEY="$PR_NUMBER"
+            TAGS+=("$REGISTRY/$IMAGE_NAME:$PR_NUMBER")
+          else
+            CACHE_KEY=$(printf '%s' "${GITHUB_REF_NAME:-$RUN_ID}" | tr '/' '-' | tr -cd '[:alnum:]._-')
+            if [[ -z "$CACHE_KEY" ]]; then
+              CACHE_KEY="$RUN_ID"
+            fi
+          fi
+
+          CACHE_FROM=(
+            "type=registry,ref=$REGISTRY/$IMAGE_NAME:main-buildcache,mode=max"
+          )
+          if [[ "$CACHE_KEY" != "main" ]]; then
+            CACHE_FROM+=("type=registry,ref=$REGISTRY/$IMAGE_NAME:$CACHE_KEY-buildcache,mode=max")
+          fi
+
+          {
+            echo "tags<<EOF"
+            printf '%s\n' "${TAGS[@]}"
+            echo "EOF"
+            echo "cache-from<<EOF"
+            printf '%s\n' "${CACHE_FROM[@]}"
+            echo "EOF"
+            echo "cache-to=type=registry,ref=$REGISTRY/$IMAGE_NAME:$CACHE_KEY-buildcache,mode=max"
+          } >> "$GITHUB_OUTPUT"
+
+      - name: Build and push
+        uses: docker/build-push-action@v5
+        with:
+          file: ${{ inputs.dockerfile }}
+          push: true
+          context: .
+          platforms: ${{ inputs.platform }}
+          build-contexts: ${{ inputs.build-contexts }}
+          build-args: ${{ inputs.build-args }}
+          cache-from: |
+            ${{ steps.build_meta.outputs.cache-from }}
+            ${{ steps.recent_pr_cache_refs.outputs.cache-from }}
+          cache-to: ${{ steps.build_meta.outputs.cache-to }}
+          no-cache: false
+          tags: |
+            ${{ steps.build_meta.outputs.tags }}
+          target: ${{ inputs.target }}
diff --git a/.github/workflows/cicd-approve-test-queue.yml b/.github/workflows/cicd-approve-test-queue.yml
new file mode 100644
index 0000000000..ce9677163a
--- /dev/null
+++ b/.github/workflows/cicd-approve-test-queue.yml
@@ -0,0 +1,34 @@
+# Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: Approve Test Queue
+
+on:
+  schedule:
+    - cron: "*/5 * * * *"
+  workflow_dispatch:
+
+jobs:
+  approve-test-queue:
+    if: github.repository == 'NVIDIA-NeMo/RL'
+    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_test_approval_queue.yml@v1.3.0
+    with:
+      workflow_name: CICD NeMo RL
+      max_concurrency_internal: ${{ fromJSON(vars.MAX_CONCURRENCY || '3') }}
+      max_concurrency_external: ${{ fromJSON(vars.MAX_CONCURRENCY_EXTERNAL || '3') }}
+    secrets:
+      PAT: ${{ secrets.PAT }}
+      NVIDIA_MANAGEMENT_ORG_PAT: ${{ secrets.NVIDIA_MANAGEMENT_ORG_PAT }}
+      SLACK_CI_CHANNEL_WEBHOOK: ${{ secrets.SLACK_GITHUB_CI_WEBHOOK }}
+      SLACK_TEAM_GROUP_ID: ${{ secrets.SLACK_TEAM_GROUP_ID }}
diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml
index e1c802b5bf..8f58db020c 100644
--- a/.github/workflows/cicd-main.yml
+++ b/.github/workflows/cicd-main.yml
@@ -42,6 +42,9 @@ concurrency:
   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-${{ github.event.label.name || 'main' }}-${{ github.event_name }}
   cancel-in-progress: true
 
+env:
+  GB200_CONTAINER_REGISTRY: ${{ vars.GB200_CONTAINER_REGISTRY }}
+
 jobs:
   pre-flight:
     runs-on: ubuntu-latest
@@ -176,16 +179,27 @@ jobs:
   org-member-pre-flight:
     uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_cicd_preflight.yml@v0.80.1
     with:
-      default_runner_prefix: ${{ vars.DEFAULT_RUNNER_PREFIX }}
-      non_nvidia_runner_prefix: ${{ vars.NON_NVIDIA_RUNNER_PREFIX }}
-      default_test_data_path: ${{ vars.DEFAULT_TEST_DATA_PATH }}
-      non_nvidia_test_data_path: ${{ vars.NON_NVIDIA_TEST_DATA_PATH }}
-      default_registry: ${{ vars.DEFAULT_CONTAINER_REGISTRY }}
-      non_nvidia_registry: ${{ vars.NON_NVIDIA_CONTAINER_REGISTRY }}
+      default_runner_prefix: ${{ vars.DEFAULT_H100_RUNNER }}
+      non_nvidia_runner_prefix: ${{ vars.NON_NVIDIA_H100_RUNNER }}
+      default_test_data_path: ${{ vars.DEFAULT_H100_TEST_DATA_PATH }}
+      non_nvidia_test_data_path: ${{ vars.NON_NVIDIA_H100_TEST_DATA_PATH }}
+      default_registry: ${{ vars.DEFAULT_H100_CONTAINER_REGISTRY }}
+      non_nvidia_registry: ${{ vars.NON_NVIDIA_H100_CONTAINER_REGISTRY }}
       sso_users_filename: ${{ vars.SSO_USERS_FILENAME }}
     secrets:
       NVIDIA_MANAGEMENT_ORG_PAT: ${{ secrets.NVIDIA_MANAGEMENT_ORG_PAT }}
 
+  gb200-config:
+    runs-on: ubuntu-latest
+    outputs:
+      registry: ${{ steps.config.outputs.registry }}
+    steps:
+      - name: Configure GB200 registry
+        id: config
+        env:
+          GB200_REGISTRY: ${{ env.GB200_CONTAINER_REGISTRY }}
+        run: echo "registry=$GB200_REGISTRY" | tee -a "$GITHUB_OUTPUT"
+
   pr-branch-up-to-date-check:
     name: Check if PR branch is up to date
     needs: [pre-flight]
@@ -278,26 +292,109 @@ jobs:
       - name: Minimize uv cache
         run: uv cache prune --ci
 
+  cicd-wait-in-queue:
+    name: Wait in test approval queue
+    needs: [pre-flight, lint-check]
+    runs-on: ubuntu-latest
+    environment: test
+    if: >-
+      ${{
+        always() &&
+        startsWith(github.ref, 'refs/heads/pull-request/') &&
+        contains('Lfast L0 L1 L2', needs.pre-flight.outputs.test_level) &&
+        needs.pre-flight.result == 'success' &&
+        needs.lint-check.result == 'success' &&
+        !cancelled()
+      }}
+    steps:
+      - name: Approved
+        run: echo "Approved to run CI tests."
+
   sphinx-build:
-    needs: [pre-flight]
-    if: ${{ needs.pre-flight.outputs.test_level != 'none' }}
+    needs: [pre-flight, cicd-wait-in-queue]
+    if: >-
+      ${{
+        always() &&
+        needs.pre-flight.result == 'success' &&
+        needs.pre-flight.outputs.test_level != 'none' &&
+        (
+          needs.cicd-wait-in-queue.result == 'success' ||
+          needs.pre-flight.outputs.test_level == 'docs' ||
+          !startsWith(github.ref, 'refs/heads/pull-request/')
+        ) &&
+        !cancelled()
+      }}
     uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_build_docs.yml@v0.57.0
 
   build-container:
-    if: ${{ needs.pre-flight.outputs.test_level != 'none' && needs.pre-flight.outputs.image_tag == '' }}
-    needs: [pre-flight, org-member-pre-flight]
-    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_build_container.yml@v0.78.0
+    name: Build H100 container
+    if: >-
+      ${{
+        always() &&
+        needs.pre-flight.result == 'success' &&
+        needs.org-member-pre-flight.result == 'success' &&
+        needs.pre-flight.outputs.test_level != 'none' &&
+        needs.pre-flight.outputs.image_tag == '' &&
+        (
+          needs.cicd-wait-in-queue.result == 'success' ||
+          !startsWith(github.ref, 'refs/heads/pull-request/')
+        ) &&
+        !cancelled()
+      }}
+    needs: [pre-flight, org-member-pre-flight, cicd-wait-in-queue]
+    permissions:
+      contents: read
+      pull-requests: read
+    uses: ./.github/workflows/_build_container.yml
     with:
       build-ref: ${{ needs.pre-flight.outputs.test_sha }}
       image-name: ${{ vars.CI_CONTAINER_NAME }}
       dockerfile: docker/Dockerfile
-      runner: ${{ contains(needs.org-member-pre-flight.outputs.runner_prefix, 'azure') && format('{0}-gpu-x2', needs.org-member-pre-flight.outputs.runner_prefix) || contains(needs.org-member-pre-flight.outputs.runner_prefix, 'gcp') && format('{0}-gpu-x4', needs.org-member-pre-flight.outputs.runner_prefix) }}
-      image-label: ${{ vars.CI_CONTAINER_NAME }}
-      target: release
+      platform: linux/amd64
       registry: ${{ needs.org-member-pre-flight.outputs.registry }}
+      runner: ${{ needs.org-member-pre-flight.outputs.runner_prefix }}
+      target: release
       build-contexts: |
-        nemo-rl=${{ github.run_id }}/
-        ${{ vars.UV_BUILD_CACHE == 'enabled' && format('uv-cache-seed=docker-image://{0}/{1}-uv-cache:latest', needs.org-member-pre-flight.outputs.registry, vars.CI_CONTAINER_NAME) || '' }}
+        nemo-rl=.
+        ${{ vars.UV_BUILD_CACHE == 'enabled' && format('uv-cache-seed=docker-image://{0}/{1}:uv-cache', needs.org-member-pre-flight.outputs.registry, vars.CI_CONTAINER_NAME) || '' }}
+      build-args: |
+        MAX_JOBS=4
+        NEMO_RL_COMMIT=${{ needs.pre-flight.outputs.test_sha }}
+
+  build-container-gb200:
+    name: Build GB200/GCP container
+    if: >-
+      ${{
+        always() &&
+        needs.pre-flight.result == 'success' &&
+        needs.org-member-pre-flight.result == 'success' &&
+        needs.gb200-config.result == 'success' &&
+        needs.pre-flight.outputs.test_level != 'none' &&
+        needs.pre-flight.outputs.image_tag == '' &&
+        needs.org-member-pre-flight.outputs.is_member == 'true' &&
+        contains('L1 L2', needs.pre-flight.outputs.test_level) &&
+        (
+          needs.cicd-wait-in-queue.result == 'success' ||
+          !startsWith(github.ref, 'refs/heads/pull-request/')
+        ) &&
+        !cancelled()
+      }}
+    needs: [pre-flight, org-member-pre-flight, gb200-config, cicd-wait-in-queue]
+    permissions:
+      contents: read
+      pull-requests: read
+    uses: ./.github/workflows/_build_container.yml
+    with:
+      build-ref: ${{ needs.pre-flight.outputs.test_sha }}
+      image-name: ${{ vars.CI_CONTAINER_NAME }}
+      dockerfile: docker/Dockerfile
+      platform: linux/arm64
+      registry: ${{ needs.gb200-config.outputs.registry }}
+      runner: ${{ vars.GB200_RUNNER }}
+      target: release
+      build-contexts: |
+        nemo-rl=.
+        ${{ vars.UV_BUILD_CACHE == 'enabled' && format('uv-cache-seed=docker-image://{0}/{1}:uv-cache', needs.gb200-config.outputs.registry, vars.CI_CONTAINER_NAME) || '' }}
       build-args: |
         MAX_JOBS=4
         NEMO_RL_COMMIT=${{ needs.pre-flight.outputs.test_sha }}
@@ -308,10 +405,10 @@ jobs:
     if: >-
       ${{
         github.ref == 'refs/heads/main' &&
+        vars.UV_BUILD_CACHE == 'enabled' &&
         needs.build-container.result == 'success'
       }}
-    runs-on: ${{ format('{0}-gpu-x2', needs.org-member-pre-flight.outputs.runner_prefix) }}
-    environment: nemo-ci
+    runs-on: ${{ needs.org-member-pre-flight.outputs.runner_prefix }}
     env:
       REGISTRY: ${{ needs.org-member-pre-flight.outputs.registry }}
       IMAGE_NAME: ${{ vars.CI_CONTAINER_NAME }}
@@ -320,7 +417,40 @@ jobs:
         run: |
           set -euo pipefail
           SRC="${REGISTRY}/${IMAGE_NAME}:${{ github.run_id }}"
-          DST="${REGISTRY}/${IMAGE_NAME}-uv-cache:latest"
+          DST="${REGISTRY}/${IMAGE_NAME}:uv-cache"
+
+          docker pull "${SRC}"
+          CID=$(docker create "${SRC}" true)
+          mkdir -p /tmp/uv-cache
+          docker cp "${CID}:/root/.cache/uv/." /tmp/uv-cache/
+          docker rm "${CID}"
+
+          printf 'FROM scratch\nCOPY uv-cache/ /\n' > /tmp/Dockerfile.uv-cache
+          docker build -t "${DST}" -f /tmp/Dockerfile.uv-cache /tmp
+          docker push "${DST}"
+
+          docker rmi "${SRC}" "${DST}" 2>/dev/null || true
+          rm -rf /tmp/uv-cache /tmp/Dockerfile.uv-cache
+
+  update-uv-cache-gb200:
+    name: Update GB200 uv build cache
+    needs: [build-container-gb200, gb200-config]
+    if: >-
+      ${{
+        github.ref == 'refs/heads/main' &&
+        vars.UV_BUILD_CACHE == 'enabled' &&
+        needs.build-container-gb200.result == 'success'
+      }}
+    runs-on: ${{ vars.GB200_RUNNER }}
+    env:
+      REGISTRY: ${{ needs.gb200-config.outputs.registry }}
+      IMAGE_NAME: ${{ vars.CI_CONTAINER_NAME }}
+    steps:
+      - name: Extract and push uv cache image
+        run: |
+          set -euo pipefail
+          SRC="${REGISTRY}/${IMAGE_NAME}:${{ github.run_id }}"
+          DST="${REGISTRY}/${IMAGE_NAME}:uv-cache"
 
           docker pull "${SRC}"
           CID=$(docker create "${SRC}" true)
@@ -341,27 +471,32 @@ jobs:
       matrix:
         include:
           - script: Docs_Tests
-            runner: ${{ needs.org-member-pre-flight.outputs.runner_prefix }}-gpu-x2
-    needs: [pre-flight, build-container, org-member-pre-flight]
+            runner: ${{ needs.org-member-pre-flight.outputs.runner_prefix }}
+    needs: [pre-flight, build-container, org-member-pre-flight, cicd-wait-in-queue]
     if: >-
       ${{
         (
           always() &&
           contains('docs Lfast L0 L1 L2', needs.pre-flight.outputs.test_level) &&
           needs.pre-flight.result == 'success' &&
+          needs.org-member-pre-flight.result == 'success' &&
+          (
+            needs.cicd-wait-in-queue.result == 'success' ||
+            needs.pre-flight.outputs.test_level == 'docs' ||
+            !startsWith(github.ref, 'refs/heads/pull-request/')
+          ) &&
           (needs.build-container.result == 'success' || needs.build-container.result == 'skipped')
         ) && !cancelled()
       }}
     runs-on: ${{ matrix.runner }}
     name: ${{ matrix.is_optional && 'PLEASEFIXME_' || '' }}${{ matrix.script }}
-    environment: nemo-ci
     steps:
       - name: Checkout
         uses: actions/checkout@v6
       - name: main
         uses: ./.github/actions/test-template
         with:
-          runner: ${{ runner.name }}
+          runner: ${{ matrix.runner }}
           registry: ${{ needs.org-member-pre-flight.outputs.registry }}
           image: ${{ vars.CI_CONTAINER_NAME }}
           image-tag: ${{ needs.pre-flight.outputs.image_tag }}
@@ -373,21 +508,65 @@ jobs:
   cicd-unit-tests:
     strategy:
       fail-fast: false
+      max-parallel: 16
       matrix:
         include:
-          - script: L0_Unit_Tests_Generation
-            runner: ${{ needs.org-member-pre-flight.outputs.runner_prefix }}-gpu-x2
-          - script: L0_Unit_Tests_Policy
-            runner: ${{ needs.org-member-pre-flight.outputs.runner_prefix }}-gpu-x2
+          - script: L0_Unit_Tests_Vllm_1
+            runner: ${{ needs.org-member-pre-flight.outputs.runner_prefix }}
+          - script: L0_Unit_Tests_Vllm_2
+            runner: ${{ needs.org-member-pre-flight.outputs.runner_prefix }}
+          - script: L0_Unit_Tests_Vllm_3
+            runner: ${{ needs.org-member-pre-flight.outputs.runner_prefix }}
+          - script: L0_Unit_Tests_Sglang
+            runner: ${{ needs.org-member-pre-flight.outputs.runner_prefix }}
+          - script: L0_Unit_Tests_Mcore
+            runner: ${{ needs.org-member-pre-flight.outputs.runner_prefix }}
+          - script: L0_Unit_Tests_Mcore_Policy_1
+            runner: ${{ needs.org-member-pre-flight.outputs.runner_prefix }}
+          - script: L0_Unit_Tests_Mcore_Policy_2
+            runner: ${{ needs.org-member-pre-flight.outputs.runner_prefix }}
+          - script: L0_Unit_Tests_Mcore_Policy_3
+            runner: ${{ needs.org-member-pre-flight.outputs.runner_prefix }}
+          - script: L0_Unit_Tests_Automodel
+            runner: ${{ needs.org-member-pre-flight.outputs.runner_prefix }}
+          - script: L0_Unit_Tests_Automodel_Policy_1
+            runner: ${{ needs.org-member-pre-flight.outputs.runner_prefix }}
+          - script: L0_Unit_Tests_Automodel_Policy_2
+            runner: ${{ needs.org-member-pre-flight.outputs.runner_prefix }}
+          - script: L0_Unit_Tests_Automodel_Policy_3
+            runner: ${{ needs.org-member-pre-flight.outputs.runner_prefix }}
+          - script: L0_Unit_Tests_Models_1
+            runner: ${{ needs.org-member-pre-flight.outputs.runner_prefix }}
+          - script: L0_Unit_Tests_Models_2
+            runner: ${{ needs.org-member-pre-flight.outputs.runner_prefix }}
+          - script: L0_Unit_Tests_Models_3
+            runner: ${{ needs.org-member-pre-flight.outputs.runner_prefix }}
+          - script: L0_Unit_Tests_Models_4
+            runner: ${{ needs.org-member-pre-flight.outputs.runner_prefix }}
+          - script: L0_Unit_Tests_Environments
+            runner: ${{ needs.org-member-pre-flight.outputs.runner_prefix }}
+          - script: L0_Unit_Tests_Nemo_Gym
+            runner: ${{ needs.org-member-pre-flight.outputs.runner_prefix }}
+          - script: L0_Unit_Tests_Algorithms
+            runner: ${{ needs.org-member-pre-flight.outputs.runner_prefix }}
+          - script: L0_Unit_Tests_Data
+            runner: ${{ needs.org-member-pre-flight.outputs.runner_prefix }}
+          - script: L0_Unit_Tests_Distributed
+            runner: ${{ needs.org-member-pre-flight.outputs.runner_prefix }}
           - script: L0_Unit_Tests_Other
-            runner: ${{ needs.org-member-pre-flight.outputs.runner_prefix }}-gpu-x2
-    needs: [pre-flight, build-container, cicd-doc-tests, org-member-pre-flight]
+            runner: ${{ needs.org-member-pre-flight.outputs.runner_prefix }}
+    needs: [pre-flight, build-container, cicd-doc-tests, org-member-pre-flight, cicd-wait-in-queue]
     if: >-
       ${{
         (
           always() &&
           contains('L0 L1 L2 Lfast', needs.pre-flight.outputs.test_level) &&
           needs.pre-flight.result == 'success' &&
+          needs.org-member-pre-flight.result == 'success' &&
+          (
+            needs.cicd-wait-in-queue.result == 'success' ||
+            !startsWith(github.ref, 'refs/heads/pull-request/')
+          ) &&
           (needs.build-container.result == 'success' || needs.build-container.result == 'skipped') &&
           (needs.cicd-doc-tests.result == 'success' || needs.cicd-doc-tests.result == 'skipped')
         ) && !cancelled()
@@ -404,7 +583,7 @@ jobs:
         env:
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
         with:
-          runner: ${{ runner.name }}
+          runner: ${{ matrix.runner }}
           script: ${{ matrix.script }}
           registry: ${{ needs.org-member-pre-flight.outputs.registry }}
           test_data_path: ${{ needs.org-member-pre-flight.outputs.test_data_path }}
@@ -414,18 +593,148 @@ jobs:
           cpu-only: ${{ matrix.cpu-only || false }}
           test-commit-sha: ${{ needs.pre-flight.outputs.test_sha }}
 
+  unit-test-script-check:
+    name: Check unit test script coverage
+    needs: [pre-flight, cicd-wait-in-queue]
+    if: >-
+      ${{
+        always() &&
+        contains('L0 L1 L2 Lfast', needs.pre-flight.outputs.test_level) &&
+        needs.pre-flight.result == 'success' &&
+        (
+          needs.cicd-wait-in-queue.result == 'success' ||
+          !startsWith(github.ref, 'refs/heads/pull-request/')
+        ) &&
+        !cancelled()
+      }}
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v6
+        with:
+          ref: ${{ needs.pre-flight.outputs.test_sha }}
+
+      - name: Verify L0 unit scripts are in the workflow
+        run: |
+          set -euo pipefail
+
+          expected=$(mktemp)
+          configured=$(mktemp)
+
+          find tests/unit -maxdepth 1 -type f -name 'L0_Unit*.sh' \
+            -exec basename {} .sh \; | sort -u > "$expected"
+
+          {
+            grep -E '^[[:space:]]*-[[:space:]]*script:[[:space:]]*L0_Unit' .github/workflows/cicd-main.yml || true
+          } | sed -E 's/^[[:space:]]*-[[:space:]]*script:[[:space:]]*//' | sort -u > "$configured"
+
+          missing=$(comm -23 "$expected" "$configured")
+          if [[ -n "$missing" ]]; then
+            echo "The following tests/unit/L0_Unit*.sh scripts are missing from .github/workflows/cicd-main.yml:"
+            printf '%s\n' "$missing"
+            exit 1
+          fi
+
+          echo "All L0 unit scripts are included in .github/workflows/cicd-main.yml."
+
+  functional-test-script-check:
+    name: Check functional test script coverage
+    needs: [pre-flight, cicd-wait-in-queue]
+    if: >-
+      ${{
+        always() &&
+        contains('L1 L2 Lfast', needs.pre-flight.outputs.test_level) &&
+        needs.pre-flight.result == 'success' &&
+        (
+          needs.cicd-wait-in-queue.result == 'success' ||
+          !startsWith(github.ref, 'refs/heads/pull-request/')
+        ) &&
+        !cancelled()
+      }}
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v6
+        with:
+          ref: ${{ needs.pre-flight.outputs.test_sha }}
+
+      - name: Verify L1 functional scripts are in the workflow
+        run: |
+          set -euo pipefail
+
+          expected=$(mktemp)
+          configured=$(mktemp)
+
+          find tests/functional -maxdepth 1 -type f -name 'L1_Functional*.sh' \
+            -exec basename {} .sh \; | sort -u > "$expected"
+
+          {
+            grep -E '^[[:space:]]*-[[:space:]]*script:[[:space:]]*L1_Functional' .github/workflows/cicd-main.yml || true
+          } | sed -E 's/^[[:space:]]*-[[:space:]]*script:[[:space:]]*//' | sort -u > "$configured"
+
+          missing=$(comm -23 "$expected" "$configured")
+          if [[ -n "$missing" ]]; then
+            echo "The following tests/functional/L1_Functional*.sh scripts are missing from .github/workflows/cicd-main.yml:"
+            printf '%s\n' "$missing"
+            exit 1
+          fi
+
+          echo "All L1 functional scripts are included in .github/workflows/cicd-main.yml."
+
   cicd-functional-tests:
     strategy:
       fail-fast: false
+      max-parallel: 16
       matrix:
         include:
-          - script: L1_Functional_Tests_GPU
-            runner: ${{ needs.org-member-pre-flight.outputs.runner_prefix }}-gpu-x2
-    needs: [pre-flight, build-container, cicd-unit-tests, org-member-pre-flight]
+          - script: L1_Functional_Tests_Megatron_1
+            runner: ${{ needs.org-member-pre-flight.outputs.runner_prefix }}
+          - script: L1_Functional_Tests_Megatron_2
+            runner: ${{ needs.org-member-pre-flight.outputs.runner_prefix }}
+          - script: L1_Functional_Tests_Megatron_3
+            runner: ${{ needs.org-member-pre-flight.outputs.runner_prefix }}
+          - script: L1_Functional_Tests_AutoModel
+            runner: ${{ needs.org-member-pre-flight.outputs.runner_prefix }}
+          - script: L1_Functional_Tests_SGLang
+            runner: ${{ needs.org-member-pre-flight.outputs.runner_prefix }}
+          - script: L1_Functional_Tests_Gym
+            runner: ${{ needs.org-member-pre-flight.outputs.runner_prefix }}
+          - script: L1_Functional_Tests_GRPO_1
+            runner: ${{ needs.org-member-pre-flight.outputs.runner_prefix }}
+          - script: L1_Functional_Tests_GRPO_2
+            runner: ${{ needs.org-member-pre-flight.outputs.runner_prefix }}
+          - script: L1_Functional_Tests_GRPO_3
+            runner: ${{ needs.org-member-pre-flight.outputs.runner_prefix }}
+          - script: L1_Functional_Tests_SFT
+            runner: ${{ needs.org-member-pre-flight.outputs.runner_prefix }}
+          - script: L1_Functional_Tests_Eval
+            runner: ${{ needs.org-member-pre-flight.outputs.runner_prefix }}
+          - script: L1_Functional_Tests_Other_1
+            runner: ${{ needs.org-member-pre-flight.outputs.runner_prefix }}
+          - script: L1_Functional_Tests_Other_2
+            runner: ${{ needs.org-member-pre-flight.outputs.runner_prefix }}
+    needs: [pre-flight, build-container, cicd-unit-tests, functional-test-script-check, org-member-pre-flight, cicd-wait-in-queue]
     runs-on: ${{ matrix.runner }}
-    if: ${{ contains('L1 L2', needs.pre-flight.outputs.test_level) }}
+    if: >-
+      ${{
+        always() &&
+        contains('L1 L2', needs.pre-flight.outputs.test_level) &&
+        needs.pre-flight.result == 'success' &&
+        needs.org-member-pre-flight.result == 'success' &&
+        (
+          needs.cicd-wait-in-queue.result == 'success' ||
+          !startsWith(github.ref, 'refs/heads/pull-request/')
+        ) &&
+        (needs.build-container.result == 'success' || needs.build-container.result == 'skipped') &&
+        needs.cicd-unit-tests.result == 'success' &&
+        needs.functional-test-script-check.result == 'success' &&
+        !cancelled()
+      }}
     name: ${{ matrix.is_optional && 'PLEASEFIXME_' || '' }}${{ matrix.script }}
-    environment: nemo-ci
     steps:
       - name: Checkout
         uses: actions/checkout@v6
@@ -434,25 +743,125 @@ jobs:
         env:
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
         with:
-          runner: ${{ runner.name }}
+          runner: ${{ matrix.runner }}
           registry: ${{ needs.org-member-pre-flight.outputs.registry }}
           image: ${{ vars.CI_CONTAINER_NAME }}
           test_data_path: ${{ needs.org-member-pre-flight.outputs.test_data_path }}
           script: ${{ matrix.script }}
           test-commit-sha: ${{ needs.pre-flight.outputs.test_sha }}
 
+  cicd-functional-tests-gb200:
+    strategy:
+      fail-fast: false
+      max-parallel: 16
+      matrix:
+        include:
+          - script: L1_Functional_Tests_Megatron_1
+            runner: ${{ vars.GB200_RUNNER }}
+          - script: L1_Functional_Tests_Megatron_2
+            runner: ${{ vars.GB200_RUNNER }}
+          - script: L1_Functional_Tests_Megatron_3
+            runner: ${{ vars.GB200_RUNNER }}
+          - script: L1_Functional_Tests_AutoModel
+            runner: ${{ vars.GB200_RUNNER }}
+          - script: L1_Functional_Tests_SGLang
+            runner: ${{ vars.GB200_RUNNER }}
+          - script: L1_Functional_Tests_Gym
+            runner: ${{ vars.GB200_RUNNER }}
+          - script: L1_Functional_Tests_GRPO_1
+            runner: ${{ vars.GB200_RUNNER }}
+          - script: L1_Functional_Tests_GRPO_2
+            runner: ${{ vars.GB200_RUNNER }}
+          - script: L1_Functional_Tests_GRPO_3
+            runner: ${{ vars.GB200_RUNNER }}
+          - script: L1_Functional_Tests_SFT
+            runner: ${{ vars.GB200_RUNNER }}
+          - script: L1_Functional_Tests_Eval
+            runner: ${{ vars.GB200_RUNNER }}
+          - script: L1_Functional_Tests_Other_1
+            runner: ${{ vars.GB200_RUNNER }}
+          - script: L1_Functional_Tests_Other_2
+            runner: ${{ vars.GB200_RUNNER }}
+    needs: [pre-flight, build-container-gb200, cicd-unit-tests, functional-test-script-check, org-member-pre-flight, gb200-config, cicd-wait-in-queue]
+    runs-on: ${{ matrix.runner }}
+    if: >-
+      ${{
+        always() &&
+        contains('L1 L2', needs.pre-flight.outputs.test_level) &&
+        needs.org-member-pre-flight.outputs.is_member == 'true' &&
+        needs.pre-flight.result == 'success' &&
+        needs.org-member-pre-flight.result == 'success' &&
+        (
+          needs.cicd-wait-in-queue.result == 'success' ||
+          !startsWith(github.ref, 'refs/heads/pull-request/')
+        ) &&
+        (needs.build-container-gb200.result == 'success' || needs.build-container-gb200.result == 'skipped') &&
+        needs.cicd-unit-tests.result == 'success' &&
+        needs.functional-test-script-check.result == 'success' &&
+        !cancelled()
+      }}
+    name: gb200_${{ matrix.is_optional && 'PLEASEFIXME_' || '' }}${{ matrix.script }}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v6
+      - name: main
+        uses: ./.github/actions/test-template
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        with:
+          runner: ${{ matrix.runner }}
+          registry: ${{ needs.gb200-config.outputs.registry }}
+          image: ${{ vars.CI_CONTAINER_NAME }}
+          test_data_path: ${{ needs.org-member-pre-flight.outputs.test_data_path }}
+          image-tag: ${{ needs.pre-flight.outputs.image_tag }}
+          script: ${{ matrix.script }}
+          test-commit-sha: ${{ needs.pre-flight.outputs.test_sha }}
+
   cicd-fast-functional-tests:
     strategy:
       fail-fast: false
       matrix:
         include:
-          - script: L1_Functional_Tests_GPU
-            runner: ${{ needs.org-member-pre-flight.outputs.runner_prefix }}-gpu-x2
-    needs: [pre-flight, org-member-pre-flight]
-    if: ${{ contains('Lfast', needs.pre-flight.outputs.test_level) }}
+          - script: L1_Functional_Tests_Megatron_1
+            runner: ${{ needs.org-member-pre-flight.outputs.runner_prefix }}
+          - script: L1_Functional_Tests_Megatron_2
+            runner: ${{ needs.org-member-pre-flight.outputs.runner_prefix }}
+          - script: L1_Functional_Tests_Megatron_3
+            runner: ${{ needs.org-member-pre-flight.outputs.runner_prefix }}
+          - script: L1_Functional_Tests_AutoModel
+            runner: ${{ needs.org-member-pre-flight.outputs.runner_prefix }}
+          - script: L1_Functional_Tests_Gym
+            runner: ${{ needs.org-member-pre-flight.outputs.runner_prefix }}
+          - script: L1_Functional_Tests_GRPO_1
+            runner: ${{ needs.org-member-pre-flight.outputs.runner_prefix }}
+          - script: L1_Functional_Tests_GRPO_2
+            runner: ${{ needs.org-member-pre-flight.outputs.runner_prefix }}
+          - script: L1_Functional_Tests_GRPO_3
+            runner: ${{ needs.org-member-pre-flight.outputs.runner_prefix }}
+          - script: L1_Functional_Tests_SFT
+            runner: ${{ needs.org-member-pre-flight.outputs.runner_prefix }}
+          - script: L1_Functional_Tests_Eval
+            runner: ${{ needs.org-member-pre-flight.outputs.runner_prefix }}
+          - script: L1_Functional_Tests_Other_1
+            runner: ${{ needs.org-member-pre-flight.outputs.runner_prefix }}
+          - script: L1_Functional_Tests_Other_2
+            runner: ${{ needs.org-member-pre-flight.outputs.runner_prefix }}
+    needs: [pre-flight, functional-test-script-check, org-member-pre-flight, cicd-wait-in-queue]
+    if: >-
+      ${{
+        always() &&
+        contains('Lfast', needs.pre-flight.outputs.test_level) &&
+        needs.pre-flight.result == 'success' &&
+        needs.org-member-pre-flight.result == 'success' &&
+        (
+          needs.cicd-wait-in-queue.result == 'success' ||
+          !startsWith(github.ref, 'refs/heads/pull-request/')
+        ) &&
+        needs.functional-test-script-check.result == 'success' &&
+        !cancelled()
+      }}
     runs-on: ${{ matrix.runner }}
     name: fast_${{ matrix.script }}
-    environment: nemo-ci
     steps:
       - name: Checkout
         uses: actions/checkout@v6
@@ -461,7 +870,7 @@ jobs:
         env:
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
         with:
-          runner: ${{ runner.name }}
+          runner: ${{ matrix.runner }}
           script: ${{ matrix.script }}
           image-tag: ${{ needs.pre-flight.outputs.image_tag }}
           registry: ${{ needs.org-member-pre-flight.outputs.registry }}
@@ -475,13 +884,19 @@ jobs:
     runs-on: ubuntu-latest
     needs:
       - pre-flight
+      - org-member-pre-flight
       - pr-branch-up-to-date-check
       - lint-check
+      - cicd-wait-in-queue
       - sphinx-build
       - build-container
+      - build-container-gb200
       - cicd-doc-tests
       - cicd-unit-tests
+      - unit-test-script-check
+      - functional-test-script-check
       - cicd-functional-tests
+      - cicd-functional-tests-gb200
       - cicd-fast-functional-tests
     steps:
       - name: main
@@ -491,24 +906,48 @@ jobs:
           ALL_SUCCESS: >-
             ${{
               needs.lint-check.result == 'success' &&
+              (needs.cicd-wait-in-queue.result == 'success' || needs.cicd-wait-in-queue.result == 'skipped') &&
               (needs.pr-branch-up-to-date-check.result == 'success' || needs.pr-branch-up-to-date-check.result == 'skipped') &&
               (
                 needs.pre-flight.outputs.test_level != 'none' &&
                 needs.sphinx-build.result == 'success' &&
                 (needs.build-container.result == 'success' || needs.build-container.result == 'skipped') &&
+                (needs.build-container-gb200.result == 'success' || needs.build-container-gb200.result == 'skipped') &&
                 (
                   (
                     (needs.cicd-doc-tests.result == 'success' || needs.cicd-doc-tests.result == 'skipped') &&
-                    (needs.cicd-unit-tests.result == 'skipped' || needs.cicd-unit-tests.result == 'success') &&
-                    (needs.cicd-functional-tests.result == 'skipped' || needs.cicd-functional-tests.result == 'success') &&
-                    (needs.cicd-fast-functional-tests.result == 'skipped' || needs.cicd-fast-functional-tests.result == 'success')
+                    (
+                      !contains('L0 L1 L2 Lfast', needs.pre-flight.outputs.test_level) ||
+                      needs.cicd-unit-tests.result == 'success'
+                    ) &&
+                    (
+                      !contains('L0 L1 L2 Lfast', needs.pre-flight.outputs.test_level) ||
+                      needs.unit-test-script-check.result == 'success'
+                    ) &&
+                    (
+                      !contains('L1 L2 Lfast', needs.pre-flight.outputs.test_level) ||
+                      needs.functional-test-script-check.result == 'success'
+                    ) &&
+                    (
+                      !contains('L1 L2', needs.pre-flight.outputs.test_level) ||
+                      needs.cicd-functional-tests.result == 'success'
+                    ) &&
+                    (
+                      needs.org-member-pre-flight.outputs.is_member != 'true' ||
+                      !contains('L1 L2', needs.pre-flight.outputs.test_level) ||
+                      needs.cicd-functional-tests-gb200.result == 'success'
+                    ) &&
+                    (
+                      !contains('Lfast', needs.pre-flight.outputs.test_level) ||
+                      needs.cicd-fast-functional-tests.result == 'success'
+                    )
                   )
                 )
               )
             }}
 
 
-          CI_SKIP: ${{ needs.pre-flight.outputs.has_cicd_skip_label }}
+          CI_SKIP: ${{ needs.pre-flight.outputs.has_skip_cicd }}
           TEST_LEVEL: ${{ needs.pre-flight.outputs.test_level }}
         run: |
           SUMMARY=$(echo $JOB_RESULTS | jq 'to_entries[] | .key + ": " + .value.result' | tr -d '"')
diff --git a/pyproject.toml b/pyproject.toml
index 73c2b2e1b9..e75ce7e279 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -220,6 +220,7 @@ test = [
   "pytest-cov",
   "pytest-asyncio",
   "pytest-testmon",
+  "pytest-shard",
 ]
 
 [tool.uv.sources]
diff --git a/tests/functional/L1_Functional_Tests_AutoModel.sh b/tests/functional/L1_Functional_Tests_AutoModel.sh
new file mode 100644
index 0000000000..9ea77645e3
--- /dev/null
+++ b/tests/functional/L1_Functional_Tests_AutoModel.sh
@@ -0,0 +1,45 @@
+# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#!/bin/bash
+set -xeuo pipefail # Exit immediately if a command exits with a non-zero status
+
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+PROJECT_ROOT=$(realpath ${SCRIPT_DIR}/../..)
+
+cd ${PROJECT_ROOT}
+
+# run_test [fast] <command...>
+# - "run_test fast <cmd>" = always runs (both fast and full modes)
+# - "run_test <cmd>"      = only runs in full mode; skipped when FAST=1
+run_test() {
+    if [[ "$1" == "fast" ]]; then
+        shift
+        time "$@"
+    elif [[ "${FAST:-0}" == "1" ]]; then
+        echo "FAST: Skipping: $*"
+    else
+        time "$@"
+    fi
+}
+
+run_test      uv run --no-sync bash ./tests/functional/dpo_automodel_lora.sh
+run_test      uv run --no-sync bash ./tests/functional/grpo_automodel_lora.sh
+run_test      uv run --no-sync bash ./tests/functional/grpo_automodel_lora_async.sh
+run_test      uv run --no-sync bash ./tests/functional/grpo_automodel_lora_non_colocated.sh
+run_test      uv run --no-sync bash ./tests/functional/sft_automodel_lora.sh
+run_test      uv run --no-sync bash ./tests/functional/test_automodel_extra_installed_correctly.sh
+
+cd ${PROJECT_ROOT}/tests
+coverage combine .coverage*
diff --git a/tests/functional/L1_Functional_Tests_Eval.sh b/tests/functional/L1_Functional_Tests_Eval.sh
new file mode 100644
index 0000000000..3d6a3b63e2
--- /dev/null
+++ b/tests/functional/L1_Functional_Tests_Eval.sh
@@ -0,0 +1,42 @@
+# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#!/bin/bash
+set -xeuo pipefail # Exit immediately if a command exits with a non-zero status
+
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+PROJECT_ROOT=$(realpath ${SCRIPT_DIR}/../..)
+
+cd ${PROJECT_ROOT}
+
+# run_test [fast] <command...>
+# - "run_test fast <cmd>" = always runs (both fast and full modes)
+# - "run_test <cmd>"      = only runs in full mode; skipped when FAST=1
+run_test() {
+    if [[ "$1" == "fast" ]]; then
+        shift
+        time "$@"
+    elif [[ "${FAST:-0}" == "1" ]]; then
+        echo "FAST: Skipping: $*"
+    else
+        time "$@"
+    fi
+}
+
+run_test      uv run --no-sync bash ./tests/functional/eval.sh
+run_test      uv run --no-sync bash ./tests/functional/eval_async.sh
+run_test fast uv run --no-sync bash ./tests/functional/eval_audio.sh
+
+cd ${PROJECT_ROOT}/tests
+coverage combine .coverage*
diff --git a/tests/functional/L1_Functional_Tests_GPU.sh b/tests/functional/L1_Functional_Tests_GPU.sh
deleted file mode 100644
index 7f3dba5053..0000000000
--- a/tests/functional/L1_Functional_Tests_GPU.sh
+++ /dev/null
@@ -1,99 +0,0 @@
-# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-#!/bin/bash
-set -xeuo pipefail # Exit immediately if a command exits with a non-zero status
-
-SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
-PROJECT_ROOT=$(realpath ${SCRIPT_DIR}/../..)
-
-cd ${PROJECT_ROOT}
-
-# run_test [fast] <command...>
-# - "run_test fast <cmd>" = always runs (both fast and full modes)
-# - "run_test <cmd>"      = only runs in full mode; skipped when FAST=1
-run_test() {
-    if [[ "$1" == "fast" ]]; then
-        shift
-        time "$@"
-    elif [[ "${FAST:-0}" == "1" ]]; then
-        echo "FAST: Skipping: $*"
-    else
-        time "$@"
-    fi
-}
-
-# This test is intentionally not run with uv run --no-sync to verify that the frozen environment is working correctly.
-run_test      bash ./tests/functional/grpo_frozen_env.sh
-run_test      bash ./tests/functional/test_frozen_env.sh
-
-run_test fast uv run --no-sync bash ./tests/functional/audio_grpo_megatron.sh
-run_test fast uv run --no-sync bash ./tests/functional/distillation.sh
-run_test      uv run --no-sync bash ./tests/functional/distillation_megatron.sh
-run_test fast uv run --no-sync bash ./tests/functional/dpo.sh
-run_test      uv run --no-sync bash ./tests/functional/dpo_automodel_lora.sh
-run_test fast uv run --no-sync bash ./tests/functional/dpo_megatron_lora.sh
-run_test      uv run --no-sync bash ./tests/functional/dpo_megatron.sh
-run_test      uv run --no-sync bash ./tests/functional/eval.sh
-run_test      uv run --no-sync bash ./tests/functional/eval_async.sh
-run_test fast uv run --no-sync bash ./tests/functional/eval_audio.sh
-run_test fast uv run --no-sync bash ./tests/functional/gdpo.sh
-run_test fast uv run --no-sync bash ./tests/functional/gdpo_async_grpo.sh
-run_test fast uv run --no-sync bash ./tests/functional/grpo.sh
-run_test fast uv run --no-sync bash ./tests/functional/grpo_dp_simple.sh
-run_test fast uv run --no-sync bash ./tests/functional/grpo_dp_mooncake.sh
-run_test fast uv run --no-sync bash ./tests/functional/grpo_async_gym.sh
-run_test      uv run --no-sync bash ./tests/functional/grpo_automodel_lora.sh
-run_test      uv run --no-sync bash ./tests/functional/grpo_automodel_lora_async.sh
-run_test      uv run --no-sync bash ./tests/functional/grpo_automodel_lora_non_colocated.sh
-run_test fast uv run --no-sync bash ./tests/functional/grpo_fsdp2.sh
-run_test      uv run --no-sync bash ./tests/functional/grpo_megatron.sh
-run_test      uv run --no-sync bash ./tests/functional/grpo_megatron_mbridge_restore.sh
-run_test fast uv run --no-sync bash ./tests/functional/grpo_megatron_eagle3_online.sh
-run_test      uv run --no-sync bash ./tests/functional/grpo_megatron_generation.sh
-run_test fast uv run --no-sync bash ./tests/functional/grpo_megatron_lora.sh
-run_test fast uv run --no-sync bash ./tests/functional/grpo_megatron_lora_async.sh
-run_test      uv run --no-sync bash ./tests/functional/grpo_multiple_dataloaders.sh
-run_test      uv run --no-sync bash ./tests/functional/grpo_multiturn.sh
-run_test      uv run --no-sync bash ./tests/functional/grpo_non_colocated.sh
-run_test      uv run --no-sync bash ./tests/functional/grpo_rm_env.sh
-run_test      uv run --no-sync bash ./tests/functional/grpo_sglang.sh
-run_test fast uv run --no-sync bash ./tests/functional/grpo_topp_topk.sh
-run_test      uv run --no-sync bash ./tests/functional/prorlv2.sh
-run_test      uv run --no-sync bash ./tests/functional/qa_distillation_megatron.sh
-run_test      uv run --no-sync bash ./tests/functional/rm.sh
-run_test fast uv run --no-sync bash ./tests/functional/sft.sh
-run_test      uv run --no-sync bash ./tests/functional/sft_automodel_lora.sh
-run_test      uv run --no-sync bash ./tests/functional/sft_avlm.sh
-run_test      uv run --no-sync bash ./tests/functional/sft_megatron.sh
-run_test      uv run --no-sync bash ./tests/functional/sft_megatron_lora.sh
-run_test      uv run --no-sync bash ./tests/functional/sft_resume_diamond.sh
-run_test      uv run --no-sync bash ./tests/functional/test_automodel_extra_installed_correctly.sh
-run_test fast uv run --no-sync bash ./tests/functional/test_converters.sh
-run_test      uv run --no-sync bash ./tests/functional/test_decode_vs_prefill.sh
-run_test      uv run --no-sync bash ./tests/functional/test_mcore_extra_installed_correctly.sh
-run_test      uv run --no-sync bash ./tests/functional/vlm_grpo.sh
-
-# Research functional tests (self-discovery)
-if [[ "${FAST:-0}" != "1" ]]; then
-    for test_script in research/*/tests/functional/*.sh; do
-        project_dir=$(echo $test_script | cut -d/ -f1-2)
-        pushd $project_dir
-        time uv run --no-sync bash $(echo $test_script | cut -d/ -f3-)
-        popd
-    done
-fi
-
-cd ${PROJECT_ROOT}/tests
-coverage combine .coverage*
diff --git a/tests/functional/L1_Functional_Tests_GRPO_1.sh b/tests/functional/L1_Functional_Tests_GRPO_1.sh
new file mode 100644
index 0000000000..f2a63930bd
--- /dev/null
+++ b/tests/functional/L1_Functional_Tests_GRPO_1.sh
@@ -0,0 +1,47 @@
+# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#!/bin/bash
+set -xeuo pipefail
+
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+PROJECT_ROOT=$(realpath ${SCRIPT_DIR}/../..)
+
+cd ${PROJECT_ROOT}
+
+# run_test [fast] <command...>
+# - "run_test fast <cmd>" = always runs (both fast and full modes)
+# - "run_test <cmd>"      = only runs in full mode; skipped when FAST=1
+run_test() {
+    if [[ "$1" == "fast" ]]; then
+        shift
+        time "$@"
+    elif [[ "${FAST:-0}" == "1" ]]; then
+        echo "FAST: Skipping: $*"
+    else
+        time "$@"
+    fi
+}
+
+# This test is intentionally not run with uv run --no-sync to verify that the frozen environment is working correctly.
+run_test      bash ./tests/functional/grpo_frozen_env.sh
+
+run_test fast uv run --no-sync bash ./tests/functional/gdpo.sh
+run_test fast uv run --no-sync bash ./tests/functional/grpo.sh
+run_test      uv run --no-sync bash ./tests/functional/grpo_multiple_dataloaders.sh
+run_test fast uv run --no-sync bash ./tests/functional/grpo_dp_simple.sh
+run_test fast uv run --no-sync bash ./tests/functional/grpo_dp_mooncake.sh
+
+cd ${PROJECT_ROOT}/tests
+coverage combine .coverage*
diff --git a/tests/functional/L1_Functional_Tests_GRPO_2.sh b/tests/functional/L1_Functional_Tests_GRPO_2.sh
new file mode 100644
index 0000000000..b1d8c26d26
--- /dev/null
+++ b/tests/functional/L1_Functional_Tests_GRPO_2.sh
@@ -0,0 +1,43 @@
+# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#!/bin/bash
+set -xeuo pipefail
+
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+PROJECT_ROOT=$(realpath ${SCRIPT_DIR}/../..)
+
+cd ${PROJECT_ROOT}
+
+# run_test [fast] <command...>
+# - "run_test fast <cmd>" = always runs (both fast and full modes)
+# - "run_test <cmd>"      = only runs in full mode; skipped when FAST=1
+run_test() {
+    if [[ "$1" == "fast" ]]; then
+        shift
+        time "$@"
+    elif [[ "${FAST:-0}" == "1" ]]; then
+        echo "FAST: Skipping: $*"
+    else
+        time "$@"
+    fi
+}
+
+run_test fast uv run --no-sync bash ./tests/functional/gdpo_async_grpo.sh
+run_test fast uv run --no-sync bash ./tests/functional/grpo_fsdp2.sh
+run_test      uv run --no-sync bash ./tests/functional/grpo_multiturn.sh
+run_test      uv run --no-sync bash ./tests/functional/grpo_non_colocated.sh
+
+cd ${PROJECT_ROOT}/tests
+coverage combine .coverage*
diff --git a/tests/functional/L1_Functional_Tests_GRPO_3.sh b/tests/functional/L1_Functional_Tests_GRPO_3.sh
new file mode 100644
index 0000000000..e64b56cefe
--- /dev/null
+++ b/tests/functional/L1_Functional_Tests_GRPO_3.sh
@@ -0,0 +1,42 @@
+# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#!/bin/bash
+set -xeuo pipefail
+
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+PROJECT_ROOT=$(realpath ${SCRIPT_DIR}/../..)
+
+cd ${PROJECT_ROOT}
+
+# run_test [fast] <command...>
+# - "run_test fast <cmd>" = always runs (both fast and full modes)
+# - "run_test <cmd>"      = only runs in full mode; skipped when FAST=1
+run_test() {
+    if [[ "$1" == "fast" ]]; then
+        shift
+        time "$@"
+    elif [[ "${FAST:-0}" == "1" ]]; then
+        echo "FAST: Skipping: $*"
+    else
+        time "$@"
+    fi
+}
+
+run_test      uv run --no-sync bash ./tests/functional/grpo_rm_env.sh
+run_test fast uv run --no-sync bash ./tests/functional/grpo_topp_topk.sh
+run_test      uv run --no-sync bash ./tests/functional/vlm_grpo.sh
+
+cd ${PROJECT_ROOT}/tests
+coverage combine .coverage*
diff --git a/tests/functional/L1_Functional_Tests_Gym.sh b/tests/functional/L1_Functional_Tests_Gym.sh
new file mode 100644
index 0000000000..33dc450d7b
--- /dev/null
+++ b/tests/functional/L1_Functional_Tests_Gym.sh
@@ -0,0 +1,40 @@
+# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#!/bin/bash
+set -xeuo pipefail # Exit immediately if a command exits with a non-zero status
+
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+PROJECT_ROOT=$(realpath ${SCRIPT_DIR}/../..)
+
+cd ${PROJECT_ROOT}
+
+# run_test [fast] <command...>
+# - "run_test fast <cmd>" = always runs (both fast and full modes)
+# - "run_test <cmd>"      = only runs in full mode; skipped when FAST=1
+run_test() {
+    if [[ "$1" == "fast" ]]; then
+        shift
+        time "$@"
+    elif [[ "${FAST:-0}" == "1" ]]; then
+        echo "FAST: Skipping: $*"
+    else
+        time "$@"
+    fi
+}
+
+run_test fast uv run --no-sync bash ./tests/functional/grpo_async_gym.sh
+
+cd ${PROJECT_ROOT}/tests
+coverage combine .coverage*
diff --git a/tests/functional/L1_Functional_Tests_Megatron_1.sh b/tests/functional/L1_Functional_Tests_Megatron_1.sh
new file mode 100644
index 0000000000..dd5a0640f6
--- /dev/null
+++ b/tests/functional/L1_Functional_Tests_Megatron_1.sh
@@ -0,0 +1,44 @@
+# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#!/bin/bash
+set -xeuo pipefail # Exit immediately if a command exits with a non-zero status
+
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+PROJECT_ROOT=$(realpath ${SCRIPT_DIR}/../..)
+
+cd ${PROJECT_ROOT}
+
+# run_test [fast] <command...>
+# - "run_test fast <cmd>" = always runs (both fast and full modes)
+# - "run_test <cmd>"      = only runs in full mode; skipped when FAST=1
+run_test() {
+    if [[ "$1" == "fast" ]]; then
+        shift
+        time "$@"
+    elif [[ "${FAST:-0}" == "1" ]]; then
+        echo "FAST: Skipping: $*"
+    else
+        time "$@"
+    fi
+}
+
+run_test fast uv run --no-sync bash ./tests/functional/audio_grpo_megatron.sh
+run_test      uv run --no-sync bash ./tests/functional/grpo_megatron.sh
+run_test      uv run --no-sync bash ./tests/functional/grpo_megatron_mbridge_restore.sh
+run_test fast uv run --no-sync bash ./tests/functional/grpo_megatron_eagle3_online.sh
+run_test      uv run --no-sync bash ./tests/functional/grpo_megatron_generation.sh
+
+cd ${PROJECT_ROOT}/tests
+coverage combine .coverage*
diff --git a/tests/functional/L1_Functional_Tests_Megatron_2.sh b/tests/functional/L1_Functional_Tests_Megatron_2.sh
new file mode 100644
index 0000000000..8884617d53
--- /dev/null
+++ b/tests/functional/L1_Functional_Tests_Megatron_2.sh
@@ -0,0 +1,43 @@
+# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#!/bin/bash
+set -xeuo pipefail # Exit immediately if a command exits with a non-zero status
+
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+PROJECT_ROOT=$(realpath ${SCRIPT_DIR}/../..)
+
+cd ${PROJECT_ROOT}
+
+# run_test [fast] <command...>
+# - "run_test fast <cmd>" = always runs (both fast and full modes)
+# - "run_test <cmd>"      = only runs in full mode; skipped when FAST=1
+run_test() {
+    if [[ "$1" == "fast" ]]; then
+        shift
+        time "$@"
+    elif [[ "${FAST:-0}" == "1" ]]; then
+        echo "FAST: Skipping: $*"
+    else
+        time "$@"
+    fi
+}
+
+run_test fast uv run --no-sync bash ./tests/functional/grpo_megatron_lora.sh
+run_test fast uv run --no-sync bash ./tests/functional/grpo_megatron_lora_async.sh
+run_test fast uv run --no-sync bash ./tests/functional/dpo_megatron_lora.sh
+run_test      uv run --no-sync bash ./tests/functional/sft_megatron_lora.sh
+
+cd ${PROJECT_ROOT}/tests
+coverage combine .coverage*
diff --git a/tests/functional/L1_Functional_Tests_Megatron_3.sh b/tests/functional/L1_Functional_Tests_Megatron_3.sh
new file mode 100644
index 0000000000..341aad7234
--- /dev/null
+++ b/tests/functional/L1_Functional_Tests_Megatron_3.sh
@@ -0,0 +1,43 @@
+# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#!/bin/bash
+set -xeuo pipefail # Exit immediately if a command exits with a non-zero status
+
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+PROJECT_ROOT=$(realpath ${SCRIPT_DIR}/../..)
+
+cd ${PROJECT_ROOT}
+
+# run_test [fast] <command...>
+# - "run_test fast <cmd>" = always runs (both fast and full modes)
+# - "run_test <cmd>"      = only runs in full mode; skipped when FAST=1
+run_test() {
+    if [[ "$1" == "fast" ]]; then
+        shift
+        time "$@"
+    elif [[ "${FAST:-0}" == "1" ]]; then
+        echo "FAST: Skipping: $*"
+    else
+        time "$@"
+    fi
+}
+
+run_test      uv run --no-sync bash ./tests/functional/distillation_megatron.sh
+run_test      uv run --no-sync bash ./tests/functional/qa_distillation_megatron.sh
+run_test      uv run --no-sync bash ./tests/functional/dpo_megatron.sh
+run_test      uv run --no-sync bash ./tests/functional/sft_megatron.sh
+
+cd ${PROJECT_ROOT}/tests
+coverage combine .coverage*
diff --git a/tests/functional/L1_Functional_Tests_Other_1.sh b/tests/functional/L1_Functional_Tests_Other_1.sh
new file mode 100644
index 0000000000..7cb7f33f61
--- /dev/null
+++ b/tests/functional/L1_Functional_Tests_Other_1.sh
@@ -0,0 +1,55 @@
+# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#!/bin/bash
+set -xeuo pipefail # Exit immediately if a command exits with a non-zero status
+
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+PROJECT_ROOT=$(realpath ${SCRIPT_DIR}/../..)
+
+cd ${PROJECT_ROOT}
+
+# run_test [fast] <command...>
+# - "run_test fast <cmd>" = always runs (both fast and full modes)
+# - "run_test <cmd>"      = only runs in full mode; skipped when FAST=1
+run_test() {
+    if [[ "$1" == "fast" ]]; then
+        shift
+        time "$@"
+    elif [[ "${FAST:-0}" == "1" ]]; then
+        echo "FAST: Skipping: $*"
+    else
+        time "$@"
+    fi
+}
+
+# This test is intentionally not run with uv run --no-sync to verify that the frozen environment is working correctly.
+run_test      bash ./tests/functional/test_frozen_env.sh
+
+run_test fast uv run --no-sync bash ./tests/functional/test_converters.sh
+run_test      uv run --no-sync bash ./tests/functional/test_decode_vs_prefill.sh
+run_test      uv run --no-sync bash ./tests/functional/test_mcore_extra_installed_correctly.sh
+
+# Research functional tests (self-discovery)
+if [[ "${FAST:-0}" != "1" ]]; then
+    for test_script in research/*/tests/functional/*.sh; do
+        project_dir=$(echo $test_script | cut -d/ -f1-2)
+        pushd $project_dir
+        time uv run --no-sync bash $(echo $test_script | cut -d/ -f3-)
+        popd
+    done
+fi
+
+cd ${PROJECT_ROOT}/tests
+coverage combine .coverage*
diff --git a/tests/functional/L1_Functional_Tests_Other_2.sh b/tests/functional/L1_Functional_Tests_Other_2.sh
new file mode 100644
index 0000000000..7c18df6865
--- /dev/null
+++ b/tests/functional/L1_Functional_Tests_Other_2.sh
@@ -0,0 +1,43 @@
+# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#!/bin/bash
+set -xeuo pipefail # Exit immediately if a command exits with a non-zero status
+
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+PROJECT_ROOT=$(realpath ${SCRIPT_DIR}/../..)
+
+cd ${PROJECT_ROOT}
+
+# run_test [fast] <command...>
+# - "run_test fast <cmd>" = always runs (both fast and full modes)
+# - "run_test <cmd>"      = only runs in full mode; skipped when FAST=1
+run_test() {
+    if [[ "$1" == "fast" ]]; then
+        shift
+        time "$@"
+    elif [[ "${FAST:-0}" == "1" ]]; then
+        echo "FAST: Skipping: $*"
+    else
+        time "$@"
+    fi
+}
+
+run_test fast uv run --no-sync bash ./tests/functional/distillation.sh
+run_test fast uv run --no-sync bash ./tests/functional/dpo.sh
+run_test      uv run --no-sync bash ./tests/functional/prorlv2.sh
+run_test      uv run --no-sync bash ./tests/functional/rm.sh
+
+cd ${PROJECT_ROOT}/tests
+coverage combine .coverage*
diff --git a/tests/functional/L1_Functional_Tests_SFT.sh b/tests/functional/L1_Functional_Tests_SFT.sh
new file mode 100644
index 0000000000..7b1b952e4b
--- /dev/null
+++ b/tests/functional/L1_Functional_Tests_SFT.sh
@@ -0,0 +1,42 @@
+# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#!/bin/bash
+set -xeuo pipefail # Exit immediately if a command exits with a non-zero status
+
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+PROJECT_ROOT=$(realpath ${SCRIPT_DIR}/../..)
+
+cd ${PROJECT_ROOT}
+
+# run_test [fast] <command...>
+# - "run_test fast <cmd>" = always runs (both fast and full modes)
+# - "run_test <cmd>"      = only runs in full mode; skipped when FAST=1
+run_test() {
+    if [[ "$1" == "fast" ]]; then
+        shift
+        time "$@"
+    elif [[ "${FAST:-0}" == "1" ]]; then
+        echo "FAST: Skipping: $*"
+    else
+        time "$@"
+    fi
+}
+
+run_test fast uv run --no-sync bash ./tests/functional/sft.sh
+run_test      uv run --no-sync bash ./tests/functional/sft_avlm.sh
+run_test      uv run --no-sync bash ./tests/functional/sft_resume_diamond.sh
+
+cd ${PROJECT_ROOT}/tests
+coverage combine .coverage*
diff --git a/tests/functional/L1_Functional_Tests_SGLang.sh b/tests/functional/L1_Functional_Tests_SGLang.sh
new file mode 100644
index 0000000000..c7143e59fa
--- /dev/null
+++ b/tests/functional/L1_Functional_Tests_SGLang.sh
@@ -0,0 +1,40 @@
+# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#!/bin/bash
+set -xeuo pipefail # Exit immediately if a command exits with a non-zero status
+
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+PROJECT_ROOT=$(realpath ${SCRIPT_DIR}/../..)
+
+cd ${PROJECT_ROOT}
+
+# run_test [fast] <command...>
+# - "run_test fast <cmd>" = always runs (both fast and full modes)
+# - "run_test <cmd>"      = only runs in full mode; skipped when FAST=1
+run_test() {
+    if [[ "$1" == "fast" ]]; then
+        shift
+        time "$@"
+    elif [[ "${FAST:-0}" == "1" ]]; then
+        echo "FAST: Skipping: $*"
+    else
+        time "$@"
+    fi
+}
+
+run_test      uv run --no-sync bash ./tests/functional/grpo_sglang.sh
+
+cd ${PROJECT_ROOT}/tests
+coverage combine .coverage*
diff --git a/tests/functional/test_converters.sh b/tests/functional/test_converters.sh
index ef789ecf90..1306414b17 100644
--- a/tests/functional/test_converters.sh
+++ b/tests/functional/test_converters.sh
@@ -1 +1,9 @@
-uv run --extra mcore tests/functional/test_converter_roundtrip.py
\ No newline at end of file
+#!/bin/bash
+set -euo pipefail
+
+SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)
+PROJECT_ROOT=$(realpath "$SCRIPT_DIR/../..")
+
+cd "$PROJECT_ROOT"
+uv run --extra mcore coverage run -a --data-file="$PROJECT_ROOT/tests/.coverage" --source="$PROJECT_ROOT/nemo_rl" \
+    tests/functional/test_converter_roundtrip.py
diff --git a/tests/functional/test_decode_vs_prefill.sh b/tests/functional/test_decode_vs_prefill.sh
index 23d05307ae..ba44872159 100644
--- a/tests/functional/test_decode_vs_prefill.sh
+++ b/tests/functional/test_decode_vs_prefill.sh
@@ -1,4 +1,12 @@
-uv run --extra vllm python tools/model_diagnostics/2.long_generation_decode_vs_prefill.py \
+#!/bin/bash
+set -euo pipefail
+
+SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)
+PROJECT_ROOT=$(realpath "$SCRIPT_DIR/../..")
+
+cd "$PROJECT_ROOT"
+uv run --extra vllm coverage run -a --data-file="$PROJECT_ROOT/tests/.coverage" --source="$PROJECT_ROOT/nemo_rl" \
+    tools/model_diagnostics/2.long_generation_decode_vs_prefill.py \
     --model nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-Base-BF16 \
     --prompts arc \
     --max-tokens 8192 \
diff --git a/tests/run_unit.sh b/tests/run_unit.sh
index 0366d6864b..336189e156 100755
--- a/tests/run_unit.sh
+++ b/tests/run_unit.sh
@@ -40,7 +40,13 @@ else
     pytest_args="$@"
 fi
 
-if ! pytest $pytest_args; then
+set +e
+pytest $pytest_args
+exit_code=$?
+set -e
+if [[ $exit_code -eq 5 ]]; then
+    echo "No tests collected; skipping."
+elif [[ $exit_code -ne 0 ]]; then
     echo "[ERROR]: Unit tests failed."
     exit 1
 fi
diff --git a/tests/unit/L0_Unit_Tests_Algorithms.sh b/tests/unit/L0_Unit_Tests_Algorithms.sh
new file mode 100644
index 0000000000..137c242531
--- /dev/null
+++ b/tests/unit/L0_Unit_Tests_Algorithms.sh
@@ -0,0 +1,22 @@
+# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#!/bin/bash
+# Shard: Algorithm tests not covered by mcore/automodel shards
+# mcore-marked tests (e.g., test_sequence_packing_gradients) are picked up
+# by L0_Unit_Tests_Mcore shard via conftest.py filtering.
+
+source "$(dirname "${BASH_SOURCE[0]}")/run_unit_shard_common.sh"
+
+uv run --no-sync bash -x ./tests/run_unit.sh "unit/algorithms/" "${EXCLUDED_UNIT_TESTS[@]}" --cov=nemo_rl --cov-report=term-missing --cov-report=json --hf-gated
diff --git a/tests/unit/L0_Unit_Tests_Automodel.sh b/tests/unit/L0_Unit_Tests_Automodel.sh
new file mode 100644
index 0000000000..1770127ce3
--- /dev/null
+++ b/tests/unit/L0_Unit_Tests_Automodel.sh
@@ -0,0 +1,21 @@
+# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#!/bin/bash
+# Shard: All automodel-marked tests except policy worker tests
+# Policy worker automodel tests run in L0_Unit_Tests_Automodel_Policy
+
+source "$(dirname "${BASH_SOURCE[0]}")/run_unit_shard_common.sh"
+
+uv run --extra automodel bash -x ./tests/run_unit.sh "unit/" "--ignore=unit/models/policy/" "${EXCLUDED_UNIT_TESTS[@]}" --cov=nemo_rl --cov-report=term-missing --cov-report=json --hf-gated --automodel-only
diff --git a/tests/unit/L0_Unit_Tests_Automodel_Policy_1.sh b/tests/unit/L0_Unit_Tests_Automodel_Policy_1.sh
new file mode 100644
index 0000000000..5e4f4b29de
--- /dev/null
+++ b/tests/unit/L0_Unit_Tests_Automodel_Policy_1.sh
@@ -0,0 +1,20 @@
+# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#!/bin/bash
+# Shard: automodel-marked policy worker tests (test_dtensor_worker*.py, test_automodel_types.py)
+
+source "$(dirname "${BASH_SOURCE[0]}")/run_unit_shard_common.sh"
+
+uv run --extra automodel bash -x ./tests/run_unit.sh "unit/models/policy/" "${EXCLUDED_UNIT_TESTS[@]}" --shard-id=0 --num-shards=3 --cov=nemo_rl --cov-report=term-missing --cov-report=json --hf-gated --automodel-only
diff --git a/tests/unit/L0_Unit_Tests_Automodel_Policy_2.sh b/tests/unit/L0_Unit_Tests_Automodel_Policy_2.sh
new file mode 100644
index 0000000000..9cb575b08c
--- /dev/null
+++ b/tests/unit/L0_Unit_Tests_Automodel_Policy_2.sh
@@ -0,0 +1,20 @@
+# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#!/bin/bash
+# Shard: automodel-marked policy worker tests (test_dtensor_worker*.py, test_automodel_types.py)
+
+source "$(dirname "${BASH_SOURCE[0]}")/run_unit_shard_common.sh"
+
+uv run --extra automodel bash -x ./tests/run_unit.sh "unit/models/policy/" "${EXCLUDED_UNIT_TESTS[@]}" --shard-id=1 --num-shards=3 --cov=nemo_rl --cov-report=term-missing --cov-report=json --hf-gated --automodel-only
diff --git a/tests/unit/L0_Unit_Tests_Automodel_Policy_3.sh b/tests/unit/L0_Unit_Tests_Automodel_Policy_3.sh
new file mode 100644
index 0000000000..9e3f43aec3
--- /dev/null
+++ b/tests/unit/L0_Unit_Tests_Automodel_Policy_3.sh
@@ -0,0 +1,20 @@
+# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#!/bin/bash
+# Shard: automodel-marked policy worker tests (test_dtensor_worker*.py, test_automodel_types.py)
+
+source "$(dirname "${BASH_SOURCE[0]}")/run_unit_shard_common.sh"
+
+uv run --extra automodel bash -x ./tests/run_unit.sh "unit/models/policy/" "${EXCLUDED_UNIT_TESTS[@]}" --shard-id=2 --num-shards=3 --cov=nemo_rl --cov-report=term-missing --cov-report=json --hf-gated --automodel-only
diff --git a/tests/unit/L0_Unit_Tests_Data.sh b/tests/unit/L0_Unit_Tests_Data.sh
new file mode 100644
index 0000000000..9ed0423c2e
--- /dev/null
+++ b/tests/unit/L0_Unit_Tests_Data.sh
@@ -0,0 +1,20 @@
+# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#!/bin/bash
+# Shard: Data pipeline tests (datasets, data processing, message utils)
+
+source "$(dirname "${BASH_SOURCE[0]}")/run_unit_shard_common.sh"
+
+uv run --no-sync bash -x ./tests/run_unit.sh "unit/data/" "${EXCLUDED_UNIT_TESTS[@]}" --cov=nemo_rl --cov-report=term-missing --cov-report=json --hf-gated
diff --git a/tests/unit/L0_Unit_Tests_Distributed.sh b/tests/unit/L0_Unit_Tests_Distributed.sh
new file mode 100644
index 0000000000..ad33c14648
--- /dev/null
+++ b/tests/unit/L0_Unit_Tests_Distributed.sh
@@ -0,0 +1,20 @@
+# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#!/bin/bash
+# Shard: Distributed infrastructure tests (worker groups, virtual cluster, logprob, model utils)
+
+source "$(dirname "${BASH_SOURCE[0]}")/run_unit_shard_common.sh"
+
+uv run --no-sync bash -x ./tests/run_unit.sh "unit/distributed/" "${EXCLUDED_UNIT_TESTS[@]}" --cov=nemo_rl --cov-report=term-missing --cov-report=json --hf-gated
diff --git a/tests/unit/L0_Unit_Tests_Environments.sh b/tests/unit/L0_Unit_Tests_Environments.sh
new file mode 100644
index 0000000000..88e032bf99
--- /dev/null
+++ b/tests/unit/L0_Unit_Tests_Environments.sh
@@ -0,0 +1,21 @@
+# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#!/bin/bash
+# Shard: Environment tests (base only, not nemo_gym-marked)
+# nemo_gym-marked tests are picked up by L0_Unit_Tests_Nemo_Gym shard.
+
+source "$(dirname "${BASH_SOURCE[0]}")/run_unit_shard_common.sh"
+
+uv run --no-sync bash -x ./tests/run_unit.sh "unit/environments/" "${EXCLUDED_UNIT_TESTS[@]}" --cov=nemo_rl --cov-report=term-missing --cov-report=json --hf-gated
diff --git a/tests/unit/L0_Unit_Tests_Generation.sh b/tests/unit/L0_Unit_Tests_Generation.sh
deleted file mode 100644
index c9a974afb8..0000000000
--- a/tests/unit/L0_Unit_Tests_Generation.sh
+++ /dev/null
@@ -1,66 +0,0 @@
-# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-#!/bin/bash
-set -xeuo pipefail # Exit immediately if a command exits with a non-zero status
-
-SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
-PROJECT_ROOT=$(realpath ${SCRIPT_DIR}/../..)
-
-cd ${PROJECT_ROOT}
-
-# Source exclusion list for FAST mode
-EXCLUDED_UNIT_TESTS=()
-if [[ "${FAST:-0}" == "1" ]]; then
-    source ${SCRIPT_DIR}/excluded_unit_tests.sh
-fi
-
-uv run tests/unit/prepare_unit_test_assets.py
-
-TEST_PATHS=("unit/models/generation/")
-IGNORE=()
-
-uv run --no-sync bash -x ./tests/run_unit.sh "${TEST_PATHS[@]}" "${IGNORE[@]}" "${EXCLUDED_UNIT_TESTS[@]}" --cov=nemo_rl --cov-report=term-missing --cov-report=json --hf-gated
-
-# Check and run mcore tests
-exit_code=$(cd ${PROJECT_ROOT}/tests && uv run --extra mcore pytest "${TEST_PATHS[@]}" "${IGNORE[@]}" "${EXCLUDED_UNIT_TESTS[@]}" --collect-only --hf-gated --mcore-only -q >/dev/null 2>&1; echo $?)
-if [[ $exit_code -eq 5 ]]; then
-    echo "No mcore tests to run"
-else
-    uv run --extra mcore bash -x ./tests/run_unit.sh "${TEST_PATHS[@]}" "${IGNORE[@]}" "${EXCLUDED_UNIT_TESTS[@]}" --cov=nemo_rl --cov-append --cov-report=term-missing --cov-report=json --hf-gated --mcore-only
-fi
-
-# Check and run automodel tests
-exit_code=$(cd ${PROJECT_ROOT}/tests && uv run --extra automodel pytest "${TEST_PATHS[@]}" "${IGNORE[@]}" "${EXCLUDED_UNIT_TESTS[@]}" --collect-only --hf-gated --automodel-only -q >/dev/null 2>&1; echo $?)
-if [[ $exit_code -eq 5 ]]; then
-    echo "No automodel tests to run"
-else
-    uv run --extra automodel bash -x ./tests/run_unit.sh "${TEST_PATHS[@]}" "${IGNORE[@]}" "${EXCLUDED_UNIT_TESTS[@]}" --cov=nemo_rl --cov-append --cov-report=term-missing --cov-report=json --hf-gated --automodel-only
-fi
-
-# Check and run vllm tests
-exit_code=$(cd ${PROJECT_ROOT}/tests && uv run --extra vllm pytest "${TEST_PATHS[@]}" "${IGNORE[@]}" "${EXCLUDED_UNIT_TESTS[@]}" --collect-only --hf-gated --vllm-only -q >/dev/null 2>&1; echo $?)
-if [[ $exit_code -eq 5 ]]; then
-    echo "No vllm tests to run"
-else
-    uv run --extra vllm bash -x ./tests/run_unit.sh "${TEST_PATHS[@]}" "${IGNORE[@]}" "${EXCLUDED_UNIT_TESTS[@]}" --cov=nemo_rl --cov-append --cov-report=term-missing --cov-report=json --hf-gated --vllm-only
-fi
-
-# Check and run sglang tests
-exit_code=$(cd ${PROJECT_ROOT}/tests && uv run --extra sglang pytest "${TEST_PATHS[@]}" "${IGNORE[@]}" "${EXCLUDED_UNIT_TESTS[@]}" --collect-only --hf-gated --sglang-only -q >/dev/null 2>&1; echo $?)
-if [[ $exit_code -eq 5 ]]; then
-    echo "No sglang tests to run"
-else
-    uv run --extra sglang bash -x ./tests/run_unit.sh "${TEST_PATHS[@]}" "${IGNORE[@]}" "${EXCLUDED_UNIT_TESTS[@]}" --cov=nemo_rl --cov-append --cov-report=term-missing --cov-report=json --hf-gated --sglang-only
-fi
diff --git a/tests/unit/L0_Unit_Tests_Mcore.sh b/tests/unit/L0_Unit_Tests_Mcore.sh
new file mode 100644
index 0000000000..19dcf39345
--- /dev/null
+++ b/tests/unit/L0_Unit_Tests_Mcore.sh
@@ -0,0 +1,21 @@
+# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#!/bin/bash
+# Shard: All mcore-marked tests except policy worker tests
+# Policy worker mcore tests run in L0_Unit_Tests_Mcore_Policy
+
+source "$(dirname "${BASH_SOURCE[0]}")/run_unit_shard_common.sh"
+
+uv run --extra mcore bash -x ./tests/run_unit.sh "unit/" "--ignore=unit/models/policy/" "${EXCLUDED_UNIT_TESTS[@]}" --cov=nemo_rl --cov-report=term-missing --cov-report=json --hf-gated --mcore-only
diff --git a/tests/unit/L0_Unit_Tests_Mcore_Policy_1.sh b/tests/unit/L0_Unit_Tests_Mcore_Policy_1.sh
new file mode 100644
index 0000000000..fd4fc76bc8
--- /dev/null
+++ b/tests/unit/L0_Unit_Tests_Mcore_Policy_1.sh
@@ -0,0 +1,20 @@
+# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#!/bin/bash
+# Shard: mcore-marked policy worker tests (test_megatron_worker.py)
+
+source "$(dirname "${BASH_SOURCE[0]}")/run_unit_shard_common.sh"
+
+uv run --extra mcore bash -x ./tests/run_unit.sh "unit/models/policy/" "${EXCLUDED_UNIT_TESTS[@]}" --shard-id=0 --num-shards=3 --cov=nemo_rl --cov-report=term-missing --cov-report=json --hf-gated --mcore-only
diff --git a/tests/unit/L0_Unit_Tests_Mcore_Policy_2.sh b/tests/unit/L0_Unit_Tests_Mcore_Policy_2.sh
new file mode 100644
index 0000000000..864cbde8fe
--- /dev/null
+++ b/tests/unit/L0_Unit_Tests_Mcore_Policy_2.sh
@@ -0,0 +1,20 @@
+# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#!/bin/bash
+# Shard: mcore-marked policy worker tests (test_megatron_worker.py)
+
+source "$(dirname "${BASH_SOURCE[0]}")/run_unit_shard_common.sh"
+
+uv run --extra mcore bash -x ./tests/run_unit.sh "unit/models/policy/" "${EXCLUDED_UNIT_TESTS[@]}" --shard-id=1 --num-shards=3 --cov=nemo_rl --cov-report=term-missing --cov-report=json --hf-gated --mcore-only
diff --git a/tests/unit/L0_Unit_Tests_Mcore_Policy_3.sh b/tests/unit/L0_Unit_Tests_Mcore_Policy_3.sh
new file mode 100644
index 0000000000..04a629ffb6
--- /dev/null
+++ b/tests/unit/L0_Unit_Tests_Mcore_Policy_3.sh
@@ -0,0 +1,20 @@
+# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#!/bin/bash
+# Shard: mcore-marked policy worker tests (test_megatron_worker.py)
+
+source "$(dirname "${BASH_SOURCE[0]}")/run_unit_shard_common.sh"
+
+uv run --extra mcore bash -x ./tests/run_unit.sh "unit/models/policy/" "${EXCLUDED_UNIT_TESTS[@]}" --shard-id=2 --num-shards=3 --cov=nemo_rl --cov-report=term-missing --cov-report=json --hf-gated --mcore-only
diff --git a/tests/unit/L0_Unit_Tests_Models_1.sh b/tests/unit/L0_Unit_Tests_Models_1.sh
new file mode 100644
index 0000000000..75c8109626
--- /dev/null
+++ b/tests/unit/L0_Unit_Tests_Models_1.sh
@@ -0,0 +1,23 @@
+# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#!/bin/bash
+# Shard: Model tests not covered by mcore/automodel/generation shards
+# Picks up base (unmarked) tests from models/policy/, models/dtensor/, models/huggingface/
+# Tests in models/megatron/ (all mcore) and models/automodel/ (all automodel) are excluded
+# by conftest.py filtering since this is a base run.
+
+source "$(dirname "${BASH_SOURCE[0]}")/run_unit_shard_common.sh"
+
+uv run --no-sync bash -x ./tests/run_unit.sh "unit/models/" "--ignore=unit/models/generation/" "${EXCLUDED_UNIT_TESTS[@]}" --shard-id=0 --num-shards=4 --cov=nemo_rl --cov-report=term-missing --cov-report=json --hf-gated
diff --git a/tests/unit/L0_Unit_Tests_Models_2.sh b/tests/unit/L0_Unit_Tests_Models_2.sh
new file mode 100644
index 0000000000..b8d7253896
--- /dev/null
+++ b/tests/unit/L0_Unit_Tests_Models_2.sh
@@ -0,0 +1,23 @@
+# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#!/bin/bash
+# Shard: Model tests not covered by mcore/automodel/generation shards
+# Picks up base (unmarked) tests from models/policy/, models/dtensor/, models/huggingface/
+# Tests in models/megatron/ (all mcore) and models/automodel/ (all automodel) are excluded
+# by conftest.py filtering since this is a base run.
+
+source "$(dirname "${BASH_SOURCE[0]}")/run_unit_shard_common.sh"
+
+uv run --no-sync bash -x ./tests/run_unit.sh "unit/models/" "--ignore=unit/models/generation/" "${EXCLUDED_UNIT_TESTS[@]}" --shard-id=1 --num-shards=4 --cov=nemo_rl --cov-report=term-missing --cov-report=json --hf-gated
diff --git a/tests/unit/L0_Unit_Tests_Models_3.sh b/tests/unit/L0_Unit_Tests_Models_3.sh
new file mode 100644
index 0000000000..984c5c5b62
--- /dev/null
+++ b/tests/unit/L0_Unit_Tests_Models_3.sh
@@ -0,0 +1,23 @@
+# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#!/bin/bash
+# Shard: Model tests not covered by mcore/automodel/generation shards
+# Picks up base (unmarked) tests from models/policy/, models/dtensor/, models/huggingface/
+# Tests in models/megatron/ (all mcore) and models/automodel/ (all automodel) are excluded
+# by conftest.py filtering since this is a base run.
+
+source "$(dirname "${BASH_SOURCE[0]}")/run_unit_shard_common.sh"
+
+uv run --no-sync bash -x ./tests/run_unit.sh "unit/models/" "--ignore=unit/models/generation/" "${EXCLUDED_UNIT_TESTS[@]}" --shard-id=2 --num-shards=4 --cov=nemo_rl --cov-report=term-missing --cov-report=json --hf-gated
diff --git a/tests/unit/L0_Unit_Tests_Models_4.sh b/tests/unit/L0_Unit_Tests_Models_4.sh
new file mode 100644
index 0000000000..84ea65b0ea
--- /dev/null
+++ b/tests/unit/L0_Unit_Tests_Models_4.sh
@@ -0,0 +1,23 @@
+# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#!/bin/bash
+# Shard: Model tests not covered by mcore/automodel/generation shards
+# Picks up base (unmarked) tests from models/policy/, models/dtensor/, models/huggingface/
+# Tests in models/megatron/ (all mcore) and models/automodel/ (all automodel) are excluded
+# by conftest.py filtering since this is a base run.
+
+source "$(dirname "${BASH_SOURCE[0]}")/run_unit_shard_common.sh"
+
+uv run --no-sync bash -x ./tests/run_unit.sh "unit/models/" "--ignore=unit/models/generation/" "${EXCLUDED_UNIT_TESTS[@]}" --shard-id=3 --num-shards=4 --cov=nemo_rl --cov-report=term-missing --cov-report=json --hf-gated
diff --git a/tests/unit/L0_Unit_Tests_Nemo_Gym.sh b/tests/unit/L0_Unit_Tests_Nemo_Gym.sh
new file mode 100644
index 0000000000..288291ffb4
--- /dev/null
+++ b/tests/unit/L0_Unit_Tests_Nemo_Gym.sh
@@ -0,0 +1,20 @@
+# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#!/bin/bash
+# Shard: All nemo_gym-marked tests anywhere in the codebase
+
+source "$(dirname "${BASH_SOURCE[0]}")/run_unit_shard_common.sh"
+
+uv run --extra nemo_gym bash -x ./tests/run_unit.sh "unit/" "${EXCLUDED_UNIT_TESTS[@]}" --cov=nemo_rl --cov-report=term-missing --cov-report=json --nemo-gym-only -vv
diff --git a/tests/unit/L0_Unit_Tests_Other.sh b/tests/unit/L0_Unit_Tests_Other.sh
index fa830aeb0b..424e1ce091 100644
--- a/tests/unit/L0_Unit_Tests_Other.sh
+++ b/tests/unit/L0_Unit_Tests_Other.sh
@@ -13,65 +13,21 @@
 # limitations under the License.
 
 #!/bin/bash
-set -xeuo pipefail # Exit immediately if a command exits with a non-zero status
+# Shard: Catch-all for everything not in other shards
+# Covers: experience (base), utils, tools, evals, rewards, root-level tests
+# Extra-marked tests are picked up by their respective shards (Mcore, Automodel, etc.)
 
-SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
-PROJECT_ROOT=$(realpath ${SCRIPT_DIR}/../..)
+source "$(dirname "${BASH_SOURCE[0]}")/run_unit_shard_common.sh"
 
-cd ${PROJECT_ROOT}
+IGNORE=(
+    "--ignore=unit/models/"
+    "--ignore=unit/environments/"
+    "--ignore=unit/algorithms/"
+    "--ignore=unit/data/"
+    "--ignore=unit/distributed/"
+)
 
-# Source exclusion list for FAST mode
-EXCLUDED_UNIT_TESTS=()
-if [[ "${FAST:-0}" == "1" ]]; then
-    source ${SCRIPT_DIR}/excluded_unit_tests.sh
-fi
-
-uv run tests/unit/prepare_unit_test_assets.py
-
-TEST_PATHS=("unit/")
-IGNORE=("--ignore=unit/models/generation/" "--ignore=unit/models/policy/")
-
-uv run --no-sync bash -x ./tests/run_unit.sh "${TEST_PATHS[@]}" "${IGNORE[@]}" "${EXCLUDED_UNIT_TESTS[@]}" --cov=nemo_rl --cov-report=term-missing --cov-report=json --hf-gated
-
-# Check and run mcore tests
-exit_code=$(cd ${PROJECT_ROOT}/tests && uv run --extra mcore pytest "${TEST_PATHS[@]}" "${IGNORE[@]}" "${EXCLUDED_UNIT_TESTS[@]}" --collect-only --hf-gated --mcore-only -q >/dev/null 2>&1; echo $?)
-if [[ $exit_code -eq 5 ]]; then
-    echo "No mcore tests to run"
-else
-    uv run --extra mcore bash -x ./tests/run_unit.sh "${TEST_PATHS[@]}" "${IGNORE[@]}" "${EXCLUDED_UNIT_TESTS[@]}" --cov=nemo_rl --cov-append --cov-report=term-missing --cov-report=json --hf-gated --mcore-only
-fi
-
-# Check and run automodel tests
-exit_code=$(cd ${PROJECT_ROOT}/tests && uv run --extra automodel pytest "${TEST_PATHS[@]}" "${IGNORE[@]}" "${EXCLUDED_UNIT_TESTS[@]}" --collect-only --hf-gated --automodel-only -q >/dev/null 2>&1; echo $?)
-if [[ $exit_code -eq 5 ]]; then
-    echo "No automodel tests to run"
-else
-    uv run --extra automodel bash -x ./tests/run_unit.sh "${TEST_PATHS[@]}" "${IGNORE[@]}" "${EXCLUDED_UNIT_TESTS[@]}" --cov=nemo_rl --cov-append --cov-report=term-missing --cov-report=json --hf-gated --automodel-only
-fi
-
-# Check and run vllm tests
-exit_code=$(cd ${PROJECT_ROOT}/tests && uv run --extra vllm pytest "${TEST_PATHS[@]}" "${IGNORE[@]}" "${EXCLUDED_UNIT_TESTS[@]}" --collect-only --hf-gated --vllm-only -q >/dev/null 2>&1; echo $?)
-if [[ $exit_code -eq 5 ]]; then
-    echo "No vllm tests to run"
-else
-    uv run --extra vllm bash -x ./tests/run_unit.sh "${TEST_PATHS[@]}" "${IGNORE[@]}" "${EXCLUDED_UNIT_TESTS[@]}" --cov=nemo_rl --cov-append --cov-report=term-missing --cov-report=json --hf-gated --vllm-only
-fi
-
-# Check and run sglang tests
-exit_code=$(cd ${PROJECT_ROOT}/tests && uv run --extra sglang pytest "${TEST_PATHS[@]}" "${IGNORE[@]}" "${EXCLUDED_UNIT_TESTS[@]}" --collect-only --hf-gated --sglang-only -q >/dev/null 2>&1; echo $?)
-if [[ $exit_code -eq 5 ]]; then
-    echo "No sglang tests to run"
-else
-    uv run --extra sglang bash -x ./tests/run_unit.sh "${TEST_PATHS[@]}" "${IGNORE[@]}" "${EXCLUDED_UNIT_TESTS[@]}" --cov=nemo_rl --cov-append --cov-report=term-missing --cov-report=json --hf-gated --sglang-only
-fi
-
-# Check and run nemo_gym tests
-exit_code=$(cd ${PROJECT_ROOT}/tests && uv run --extra nemo_gym pytest "${TEST_PATHS[@]}" "${IGNORE[@]}" "${EXCLUDED_UNIT_TESTS[@]}" --collect-only --nemo-gym-only -q >/dev/null 2>&1; echo $?)
-if [[ $exit_code -eq 5 ]]; then
-    echo "No nemo_gym tests to run"
-else
-    uv run --extra nemo_gym bash -x ./tests/run_unit.sh "${TEST_PATHS[@]}" "${IGNORE[@]}" "${EXCLUDED_UNIT_TESTS[@]}" --cov=nemo_rl --cov-append --cov-report=term-missing --cov-report=json --nemo-gym-only -vv
-fi
+uv run --no-sync bash -x ./tests/run_unit.sh "unit/" "${IGNORE[@]}" "${EXCLUDED_UNIT_TESTS[@]}" --cov=nemo_rl --cov-report=term-missing --cov-report=json --hf-gated
 
 # Skip research tests in fast mode
 if [[ "${FAST:-0}" != "1" ]]; then
diff --git a/tests/unit/L0_Unit_Tests_Policy.sh b/tests/unit/L0_Unit_Tests_Policy.sh
deleted file mode 100644
index f19691c421..0000000000
--- a/tests/unit/L0_Unit_Tests_Policy.sh
+++ /dev/null
@@ -1,66 +0,0 @@
-# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-#!/bin/bash
-set -xeuo pipefail # Exit immediately if a command exits with a non-zero status
-
-SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
-PROJECT_ROOT=$(realpath ${SCRIPT_DIR}/../..)
-
-cd ${PROJECT_ROOT}
-
-# Source exclusion list for FAST mode
-EXCLUDED_UNIT_TESTS=()
-if [[ "${FAST:-0}" == "1" ]]; then
-    source ${SCRIPT_DIR}/excluded_unit_tests.sh
-fi
-
-uv run tests/unit/prepare_unit_test_assets.py
-
-TEST_PATHS=("unit/models/policy/")
-IGNORE=()
-
-uv run --no-sync bash -x ./tests/run_unit.sh "${TEST_PATHS[@]}" "${IGNORE[@]}" "${EXCLUDED_UNIT_TESTS[@]}" --cov=nemo_rl --cov-report=term-missing --cov-report=json --hf-gated
-
-# Check and run mcore tests
-exit_code=$(cd ${PROJECT_ROOT}/tests && uv run --extra mcore pytest "${TEST_PATHS[@]}" "${IGNORE[@]}" "${EXCLUDED_UNIT_TESTS[@]}" --collect-only --hf-gated --mcore-only -q >/dev/null 2>&1; echo $?)
-if [[ $exit_code -eq 5 ]]; then
-    echo "No mcore tests to run"
-else
-    uv run --extra mcore bash -x ./tests/run_unit.sh "${TEST_PATHS[@]}" "${IGNORE[@]}" "${EXCLUDED_UNIT_TESTS[@]}" --cov=nemo_rl --cov-append --cov-report=term-missing --cov-report=json --hf-gated --mcore-only
-fi
-
-# Check and run automodel tests
-exit_code=$(cd ${PROJECT_ROOT}/tests && uv run --extra automodel pytest "${TEST_PATHS[@]}" "${IGNORE[@]}" "${EXCLUDED_UNIT_TESTS[@]}" --collect-only --hf-gated --automodel-only -q >/dev/null 2>&1; echo $?)
-if [[ $exit_code -eq 5 ]]; then
-    echo "No automodel tests to run"
-else
-    uv run --extra automodel bash -x ./tests/run_unit.sh "${TEST_PATHS[@]}" "${IGNORE[@]}" "${EXCLUDED_UNIT_TESTS[@]}" --cov=nemo_rl --cov-append --cov-report=term-missing --cov-report=json --hf-gated --automodel-only
-fi
-
-# Check and run vllm tests
-exit_code=$(cd ${PROJECT_ROOT}/tests && uv run --extra vllm pytest "${TEST_PATHS[@]}" "${IGNORE[@]}" "${EXCLUDED_UNIT_TESTS[@]}" --collect-only --hf-gated --vllm-only -q >/dev/null 2>&1; echo $?)
-if [[ $exit_code -eq 5 ]]; then
-    echo "No vllm tests to run"
-else
-    uv run --extra vllm bash -x ./tests/run_unit.sh "${TEST_PATHS[@]}" "${IGNORE[@]}" "${EXCLUDED_UNIT_TESTS[@]}" --cov=nemo_rl --cov-append --cov-report=term-missing --cov-report=json --hf-gated --vllm-only
-fi
-
-# Check and run sglang tests
-exit_code=$(cd ${PROJECT_ROOT}/tests && uv run --extra sglang pytest "${TEST_PATHS[@]}" "${IGNORE[@]}" "${EXCLUDED_UNIT_TESTS[@]}" --collect-only --hf-gated --sglang-only -q >/dev/null 2>&1; echo $?)
-if [[ $exit_code -eq 5 ]]; then
-    echo "No sglang tests to run"
-else
-    uv run --extra sglang bash -x ./tests/run_unit.sh "${TEST_PATHS[@]}" "${IGNORE[@]}" "${EXCLUDED_UNIT_TESTS[@]}" --cov=nemo_rl --cov-append --cov-report=term-missing --cov-report=json --hf-gated --sglang-only
-fi
diff --git a/tests/unit/L0_Unit_Tests_Sglang.sh b/tests/unit/L0_Unit_Tests_Sglang.sh
new file mode 100644
index 0000000000..5bf60a092e
--- /dev/null
+++ b/tests/unit/L0_Unit_Tests_Sglang.sh
@@ -0,0 +1,29 @@
+# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#!/bin/bash
+# Shard: All SGLang tests (base sglang files + sglang-marked tests anywhere)
+
+source "$(dirname "${BASH_SOURCE[0]}")/run_unit_shard_common.sh"
+
+SGLANG_PATHS=(
+    "unit/models/generation/test_sglang_generation.py"
+    "unit/models/generation/test_sglang_utils.py"
+)
+
+# Base run on sglang files (picks up unmarked tests)
+uv run --no-sync bash -x ./tests/run_unit.sh "${SGLANG_PATHS[@]}" "${EXCLUDED_UNIT_TESTS[@]}" --cov=nemo_rl --cov-report=term-missing --cov-report=json --hf-gated
+
+# sglang-only across all unit tests (catch-all)
+uv run --extra sglang bash -x ./tests/run_unit.sh "unit/" "${EXCLUDED_UNIT_TESTS[@]}" --cov=nemo_rl --cov-append --cov-report=term-missing --cov-report=json --hf-gated --sglang-only
diff --git a/tests/unit/L0_Unit_Tests_Vllm_1.sh b/tests/unit/L0_Unit_Tests_Vllm_1.sh
new file mode 100644
index 0000000000..08e4e7acda
--- /dev/null
+++ b/tests/unit/L0_Unit_Tests_Vllm_1.sh
@@ -0,0 +1,24 @@
+# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#!/bin/bash
+# Shard: vLLM generation tests (base + vllm-marked)
+
+source "$(dirname "${BASH_SOURCE[0]}")/run_unit_shard_common.sh"
+
+# Base run (tests without extra markers)
+uv run --no-sync bash -x ./tests/run_unit.sh "unit/models/generation/test_vllm*.py" "${EXCLUDED_UNIT_TESTS[@]}" --shard-id=0 --num-shards=3 --cov=nemo_rl --cov-report=term-missing --cov-report=json --hf-gated
+
+# vllm-only run (catch-all across all unit tests)
+uv run --extra vllm bash -x ./tests/run_unit.sh "unit/" "${EXCLUDED_UNIT_TESTS[@]}" --shard-id=0 --num-shards=3 --cov=nemo_rl --cov-append --cov-report=term-missing --cov-report=json --hf-gated --vllm-only
diff --git a/tests/unit/L0_Unit_Tests_Vllm_2.sh b/tests/unit/L0_Unit_Tests_Vllm_2.sh
new file mode 100644
index 0000000000..39f6a2a287
--- /dev/null
+++ b/tests/unit/L0_Unit_Tests_Vllm_2.sh
@@ -0,0 +1,24 @@
+# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#!/bin/bash
+# Shard: vLLM generation tests (base + vllm-marked)
+
+source "$(dirname "${BASH_SOURCE[0]}")/run_unit_shard_common.sh"
+
+# Base run (tests without extra markers)
+uv run --no-sync bash -x ./tests/run_unit.sh "unit/models/generation/test_vllm*.py" "${EXCLUDED_UNIT_TESTS[@]}" --shard-id=1 --num-shards=3 --cov=nemo_rl --cov-report=term-missing --cov-report=json --hf-gated
+
+# vllm-only run (catch-all across all unit tests)
+uv run --extra vllm bash -x ./tests/run_unit.sh "unit/" "${EXCLUDED_UNIT_TESTS[@]}" --shard-id=1 --num-shards=3 --cov=nemo_rl --cov-append --cov-report=term-missing --cov-report=json --hf-gated --vllm-only
diff --git a/tests/unit/L0_Unit_Tests_Vllm_3.sh b/tests/unit/L0_Unit_Tests_Vllm_3.sh
new file mode 100644
index 0000000000..bdeac8a678
--- /dev/null
+++ b/tests/unit/L0_Unit_Tests_Vllm_3.sh
@@ -0,0 +1,24 @@
+# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#!/bin/bash
+# Shard: vLLM generation tests (base + vllm-marked)
+
+source "$(dirname "${BASH_SOURCE[0]}")/run_unit_shard_common.sh"
+
+# Base run (tests without extra markers)
+uv run --no-sync bash -x ./tests/run_unit.sh "unit/models/generation/test_vllm*.py" "${EXCLUDED_UNIT_TESTS[@]}" --shard-id=2 --num-shards=3 --cov=nemo_rl --cov-report=term-missing --cov-report=json --hf-gated
+
+# vllm-only run (catch-all across all unit tests)
+uv run --extra vllm bash -x ./tests/run_unit.sh "unit/" "${EXCLUDED_UNIT_TESTS[@]}" --shard-id=2 --num-shards=3 --cov=nemo_rl --cov-append --cov-report=term-missing --cov-report=json --hf-gated --vllm-only
diff --git a/tests/unit/data/__init__.py b/tests/unit/data/__init__.py
new file mode 100644
index 0000000000..4fc25d0d3c
--- /dev/null
+++ b/tests/unit/data/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/tests/unit/experience/test_rollouts.py b/tests/unit/experience/test_rollouts.py
index e60253c50a..3b4ef1dede 100644
--- a/tests/unit/experience/test_rollouts.py
+++ b/tests/unit/experience/test_rollouts.py
@@ -946,7 +946,13 @@ def _standardize(d: dict) -> dict:
         final_batch["total_reward"] = final_batch["total_reward"].tolist()
         final_batch["loss_multiplier"] = final_batch["loss_multiplier"].tolist()
         final_batch["length"] = final_batch["length"].tolist()
-        final_batch["truncated"] = final_batch["truncated"].tolist()
+        # truncated depends on exact generation output which is not reproducible,
+        # so just verify each value is a bool rather than checking exact values
+        if "truncated" in final_batch:
+            assert all(
+                isinstance(v, (bool, int)) for v in final_batch["truncated"].tolist()
+            )
+            final_batch.pop("truncated")
 
         for key in d["rollout_metrics"]:
             # We remove these fields from comparison since we cannot guarantee exact generation reproducibility
diff --git a/tests/unit/models/generation/test_vllm_generation.py b/tests/unit/models/generation/test_vllm_generation.py
index c8d1a6c156..1b0b06cdb6 100644
--- a/tests/unit/models/generation/test_vllm_generation.py
+++ b/tests/unit/models/generation/test_vllm_generation.py
@@ -146,6 +146,17 @@
 }
 
 
+def skip_fp8_known_failures() -> None:
+    device_name = torch.cuda.get_device_name()
+    if any(gpu_name in device_name for gpu_name in ("H100", "GB200")):
+        # TODO(https://github.com/NVIDIA-NeMo/RL/issues/2081): Re-enable these
+        # FP8 vLLM tests once the known H100/GB200 failures are fixed.
+        pytest.skip(
+            f"Skipping FP8 vLLM test on {device_name} due to a known failure. "
+            "See https://github.com/NVIDIA-NeMo/RL/issues/2081"
+        )
+
+
 @pytest.mark.parametrize(
     "colocated,async_engine,expected_method,expected_kwargs",
     [
@@ -981,37 +992,37 @@ async def run_hf_train_process(
             lm_policy.shutdown()
 
 
-@pytest.mark.timeout(420)
 @pytest.mark.asyncio
 @pytest.mark.parametrize(
     ("async_engine", "cpu_offload", "vllm_precision", "enable_lora"),
     [
-        (True, False, "bfloat16", False),
-        (False, True, "bfloat16", False),
-        (True, False, "fp8", False),
-        (False, True, "fp8", False),
-        # LoRA tests (requires dtensor v2 / automodel)
-        pytest.param(False, False, "bfloat16", True, marks=pytest.mark.automodel),
-        pytest.param(True, False, "bfloat16", True, marks=pytest.mark.automodel),
+        pytest.param(True, False, "bfloat16", False, marks=pytest.mark.timeout(900)),
+        pytest.param(False, True, "bfloat16", False, marks=pytest.mark.timeout(900)),
+        pytest.param(True, False, "fp8", False, marks=pytest.mark.timeout(900)),
+        pytest.param(False, True, "fp8", False, marks=pytest.mark.timeout(900)),
+        # LoRA tests require dtensor v2 / automodel and take longer in CI.
+        pytest.param(
+            False,
+            False,
+            "bfloat16",
+            True,
+            marks=[pytest.mark.automodel, pytest.mark.timeout(900)],
+        ),
+        pytest.param(
+            True,
+            False,
+            "bfloat16",
+            True,
+            marks=[pytest.mark.automodel, pytest.mark.timeout(900)],
+        ),
     ],
 )
 async def test_vllm_generation_with_hf_training_colocated(
     cluster, tokenizer, async_engine, cpu_offload, vllm_precision, enable_lora
 ):
     """This test validates that DTensor policy can work together with colocated vLLM policy."""
-    device_name = torch.cuda.get_device_name(0)
-    if vllm_precision == "fp8" and "GB200" in device_name:
-        pytest.skip(
-            "Skipping FP8 test on GB200 until fixed. See https://github.com/NVIDIA-NeMo/RL/issues/2081"
-        )
-
-    # Skip the fp8 tests if the GPU is not H100 or newer (compute capability < 9.0)
     if vllm_precision == "fp8":
-        major_capability, _ = torch.cuda.get_device_capability()
-        if major_capability < 9:
-            pytest.skip(
-                f"Skipping FP8 test. GPU compute capability {major_capability}.0 is < 9.0 (H100 required)."
-            )
+        skip_fp8_known_failures()
 
     # Create VllmGeneration Policy
     print("Creating vLLM policy...")
@@ -1052,20 +1063,31 @@ async def test_vllm_generation_with_hf_training_colocated(
     )
 
 
-@pytest.mark.timeout(300)
 @pytest.mark.asyncio
 @pytest.mark.parametrize(
     ("async_engine", "cpu_offload", "vllm_precision", "enable_lora"),
     [
-        (True, False, "bfloat16", False),
-        (False, True, "bfloat16", False),
+        pytest.param(True, False, "bfloat16", False, marks=pytest.mark.timeout(900)),
+        pytest.param(False, True, "bfloat16", False, marks=pytest.mark.timeout(900)),
         # NOTE: non-colocated FP8 tests fail on main as of 3/9/2026 with
         # avg_prob_mult_error=1.13 > 1.08 threshold. Left unskipped to match main.
-        (True, False, "fp8", False),
-        (False, True, "fp8", False),
-        # LoRA tests (requires dtensor v2 / automodel)
-        pytest.param(False, False, "bfloat16", True, marks=pytest.mark.automodel),
-        pytest.param(True, False, "bfloat16", True, marks=pytest.mark.automodel),
+        pytest.param(True, False, "fp8", False, marks=pytest.mark.timeout(900)),
+        pytest.param(False, True, "fp8", False, marks=pytest.mark.timeout(900)),
+        # LoRA tests require dtensor v2 / automodel and take longer in CI.
+        pytest.param(
+            False,
+            False,
+            "bfloat16",
+            True,
+            marks=[pytest.mark.automodel, pytest.mark.timeout(900)],
+        ),
+        pytest.param(
+            True,
+            False,
+            "bfloat16",
+            True,
+            marks=[pytest.mark.automodel, pytest.mark.timeout(900)],
+        ),
     ],
 )
 async def test_vllm_generation_with_hf_training_non_colocated(
@@ -1076,19 +1098,8 @@ async def test_vllm_generation_with_hf_training_non_colocated(
     vllm_precision,
     enable_lora,
 ):
-    device_name = torch.cuda.get_device_name(0)
-    if vllm_precision == "fp8" and "GB200" in device_name:
-        pytest.skip(
-            "Skipping FP8 test on GB200 until fixed. See https://github.com/NVIDIA-NeMo/RL/issues/2081"
-        )
-
-    # Skip the fp8 tests if the GPU is not H100 or newer (compute capability < 9.0)
     if vllm_precision == "fp8":
-        major_capability, _ = torch.cuda.get_device_capability()
-        if major_capability < 9:
-            pytest.skip(
-                f"Skipping FP8 test. GPU compute capability {major_capability}.0 is < 9.0 (H100 required)."
-            )
+        skip_fp8_known_failures()
 
     """This test validates that DTensor policy can work together with non-colocated vLLM policy."""
     generation_cluster_separate = get_generation_cluster_separate(1)
@@ -1714,25 +1725,15 @@ async def test_vllm_http_server_correct_merged_tokens_matches_baseline(
     vllm_generation.shutdown()
 
 
-@pytest.mark.timeout(600)
+@pytest.mark.timeout(900)
 @pytest.mark.parametrize("tensor_parallel_size", [1, 2])
 @pytest.mark.parametrize("vllm_precision", ["bfloat16", "fp8"])
 def test_vllm_weight_update_and_prefix_cache_reset(
     cluster, tokenizer, tensor_parallel_size, vllm_precision
 ):
     """Test that the vLLM prefix cache is correctly reset when weights change."""
-    device_name = torch.cuda.get_device_name(0)
-    if vllm_precision == "fp8" and "GB200" in device_name:
-        pytest.skip(
-            "Skipping FP8 test on GB200 until fixed. See https://github.com/NVIDIA-NeMo/RL/issues/2081"
-        )
-
     if vllm_precision == "fp8":
-        major_capability, _ = torch.cuda.get_device_capability()
-        if major_capability < 9:
-            pytest.skip(
-                f"Skipping FP8 test. GPU compute capability {major_capability}.0 is < 9.0 (H100 required)."
-            )
+        skip_fp8_known_failures()
 
     from nemo_rl.models.policy.lm_policy import Policy
 
@@ -2130,7 +2131,7 @@ async def test_vllm_refit_non_colocated_update_weights(
 
 
 @pytest.mark.mcore
-@pytest.mark.timeout(360)
+@pytest.mark.timeout(600)
 @pytest.mark.parametrize("tensor_parallel_size", [1, 2])
 @pytest.mark.parametrize("vllm_precision", ["bfloat16", "fp8"])
 @pytest.mark.parametrize("kv_cache_dtype", [None, "fp8"])
@@ -2141,24 +2142,13 @@ def test_vllm_generation_with_megatron_training(
 
     This test validates that vLLM and Megatron policies can work together.
     """
-    device_name = torch.cuda.get_device_name(0)
-    if vllm_precision == "fp8" and "GB200" in device_name:
-        pytest.skip(
-            "Skipping FP8 test on GB200 until fixed. See https://github.com/NVIDIA-NeMo/RL/issues/2081"
-        )
+    if vllm_precision == "fp8":
+        skip_fp8_known_failures()
 
     # Skip invalid configurations: kv_cache_dtype=fp8 requires precision=fp8
     if kv_cache_dtype == "fp8" and vllm_precision != "fp8":
         pytest.skip("kv_cache_dtype='fp8' requires precision='fp8'")
 
-    # Skip the fp8 tests if the GPU is not H100 or newer (compute capability < 9.0)
-    if vllm_precision == "fp8":
-        major_capability, _ = torch.cuda.get_device_capability()
-        if major_capability < 9:
-            pytest.skip(
-                f"Skipping FP8 test. GPU compute capability {major_capability}.0 is < 9.0 (H100 required)."
-            )
-
     if cluster.num_gpus_per_node < tensor_parallel_size:
         pytest.skip(f"Need at least {tensor_parallel_size} GPUs for this test")
 
@@ -2321,19 +2311,8 @@ def test_vllm_generation_with_megatron_training_moe_model(
 
     This test validates that vLLM and Megatron policies can work together.
     """
-    device_name = torch.cuda.get_device_name(0)
-    if vllm_precision == "fp8" and "GB200" in device_name:
-        pytest.skip(
-            "Skipping FP8 test on GB200 until fixed. See https://github.com/NVIDIA-NeMo/RL/issues/2081"
-        )
-
-    # Skip the fp8 tests if the GPU is not H100 or newer (compute capability < 9.0)
     if vllm_precision == "fp8":
-        major_capability, _ = torch.cuda.get_device_capability()
-        if major_capability < 9:
-            pytest.skip(
-                f"Skipping FP8 test. GPU compute capability {major_capability}.0 is < 9.0 (H100 required)."
-            )
+        skip_fp8_known_failures()
 
     model_name = "moonshotai/Moonlight-16B-A3B-Instruct"
     expert_parallel_size = 8
diff --git a/tests/unit/models/policy/__init__.py b/tests/unit/models/policy/__init__.py
new file mode 100644
index 0000000000..4fc25d0d3c
--- /dev/null
+++ b/tests/unit/models/policy/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/tests/unit/models/policy/test_dtensor_worker.py b/tests/unit/models/policy/test_dtensor_worker.py
index 4043e3c8a3..a1737de3bd 100644
--- a/tests/unit/models/policy/test_dtensor_worker.py
+++ b/tests/unit/models/policy/test_dtensor_worker.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import pprint
-import time
 
 import pytest
 import ray
@@ -27,6 +26,7 @@
 from nemo_rl.models.generation import configure_generation_config
 from nemo_rl.models.policy import PolicyConfig
 from nemo_rl.models.policy.lm_policy import Policy
+from nemo_rl.utils.flops_tracker import FLOPTracker, get_default_hf_config
 from tests.unit.test_utils import SimpleLossFn
 
 
@@ -1046,7 +1046,7 @@ def test_dtensor_v1_policy_flops_range_check(
     ):
         """Test that the returned FLOPS is within a reasonable range using dtensor backend.
 
-        Performs 2 warmup iterations and measures FLOPS for the next 3 iterations.
+        Performs 2 warmup iterations and checks FLOPS for the next 3 iterations.
         """
         batch_size = 8
         seq_len = 128
@@ -1101,12 +1101,9 @@ def test_dtensor_v1_policy_flops_range_check(
             for warmup_step in range(2):
                 results = policy.train(data, loss_fn)
 
-            # Measure FLOPS on the third iteration
-            print("Measuring FLOPS on 3 iterations...")
-            time_begin = time.time()
+            print("Checking FLOPS on 3 iterations...")
             for train_step in range(3):
                 results = policy.train(data, loss_fn)
-            runtime_sec = time.time() - time_begin
 
             # Check if FLOPS tracking is available
             if policy.flops_tracker is not None:
@@ -1120,14 +1117,19 @@ def test_dtensor_v1_policy_flops_range_check(
                 )
                 assert total_flops > 0, "total_flops should be positive"
 
-                total_tflops = total_flops / 1e12 / 3
-                print(f"Total FLOPS: {total_flops:.2e} ({total_tflops:.4f} TFLOPS)")
+                expected_tracker = FLOPTracker.from_config(
+                    config["model_name"], get_default_hf_config(config["model_name"])
+                )
+                expected_tracker.track_batch(input_lengths.tolist())
+                expected_total_flops = expected_tracker.total_flops
 
-                flop_count_total = total_flops * runtime_sec
-                assert 1e9 < flop_count_total < 5e10, (
-                    "Total FLOPS should be within 1e9 and 5e10"
+                assert total_flops == pytest.approx(expected_total_flops, rel=0.05), (
+                    f"Expected {expected_total_flops:.2e} FLOPS, got {total_flops:.2e}"
                 )
 
+                total_tflops = total_flops / 1e12
+                print(f"Total FLOPS: {total_flops:.2e} ({total_tflops:.4f} TFLOPS)")
+
                 if "theoretical_tflops" in results:
                     theoretical_tflops = results["theoretical_tflops"]
                     assert isinstance(theoretical_tflops, (int, float)), (
diff --git a/tests/unit/run_unit_shard_common.sh b/tests/unit/run_unit_shard_common.sh
new file mode 100644
index 0000000000..3ca50b3f65
--- /dev/null
+++ b/tests/unit/run_unit_shard_common.sh
@@ -0,0 +1,32 @@
+# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Common boilerplate for unit test shard scripts.
+# Source this file at the top of each L0_Unit_Tests_*.sh shard script.
+# It sets up: SCRIPT_DIR, PROJECT_ROOT, FAST exclusions, and test assets.
+
+set -xeuo pipefail
+
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+PROJECT_ROOT=$(realpath ${SCRIPT_DIR}/../..)
+
+cd ${PROJECT_ROOT}
+
+# Source exclusion list for FAST mode
+EXCLUDED_UNIT_TESTS=()
+if [[ "${FAST:-0}" == "1" ]]; then
+    source ${SCRIPT_DIR}/excluded_unit_tests.sh
+fi
+
+uv run tests/unit/prepare_unit_test_assets.py
diff --git a/tests/unit/test_recipes_and_test_suites.py b/tests/unit/test_recipes_and_test_suites.py
index 760ce027b5..f8a5bdc5d1 100644
--- a/tests/unit/test_recipes_and_test_suites.py
+++ b/tests/unit/test_recipes_and_test_suites.py
@@ -326,28 +326,3 @@ def test_all_recipes_start_with_algo_hyphen(all_recipe_yaml_rel_paths):
         assert algo in expected_algos, (
             f"Recipe {recipe_yaml} has unexpected algo {algo}"
         )
-
-
-def test_functional_tests_exist():
-    functional_tests_dir = os.path.join(project_root, "tests", "functional")
-
-    test_list = []
-    with open(
-        os.path.join(functional_tests_dir, "L1_Functional_Tests_GPU.sh"), "r"
-    ) as f:
-        for line in f:
-            line = line.strip()
-            if line and "./tests/functional" in line:
-                test_list.append(line.split(" ")[-1].split("/")[-1])
-
-    missing_list = []
-    for filename in os.listdir(functional_tests_dir):
-        if filename.endswith(".sh"):
-            if filename == "L1_Functional_Tests_GPU.sh":
-                continue
-            if filename not in test_list:
-                missing_list.append(f"./tests/functional/{filename}")
-
-    assert len(missing_list) == 0, (
-        f"Missing functional test scripts in ./tests/functional/L1_Functional_Tests_GPU.sh:\n{'\n'.join(missing_list)}"
-    )
diff --git a/uv.lock b/uv.lock
index 7d182cf205..3cb7b14dff 100644
--- a/uv.lock
+++ b/uv.lock
@@ -4014,6 +4014,7 @@ test = [
     { name = "pytest" },
     { name = "pytest-asyncio" },
     { name = "pytest-cov" },
+    { name = "pytest-shard" },
     { name = "pytest-testmon" },
     { name = "pytest-timeout" },
 ]
@@ -4146,6 +4147,7 @@ test = [
     { name = "pytest", specifier = ">=8.4.2" },
     { name = "pytest-asyncio" },
     { name = "pytest-cov" },
+    { name = "pytest-shard" },
     { name = "pytest-testmon" },
     { name = "pytest-timeout" },
 ]
@@ -5717,6 +5719,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/9d/7a/d968e294073affff457b041c2be9868a40c1c71f4a35fcc1e45e5493067b/pytest_cov-7.1.0-py3-none-any.whl", hash = "sha256:a0461110b7865f9a271aa1b51e516c9a95de9d696734a2f71e3e78f46e1d4678", size = 22876, upload-time = "2026-03-21T20:11:14.438Z" },
 ]
 
+[[package]]
+name = "pytest-shard"
+version = "0.1.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pytest" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c6/ca/3efa6f3b84dab83220db45997e785be726684c2c2c4267bffb7d80101c7f/pytest-shard-0.1.2.tar.gz", hash = "sha256:b86a967fbfd1c8e50295095ccda031b7e890862ee06531d5142844f4c1d1cd67", size = 3579, upload-time = "2020-12-11T19:52:55.083Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/65/7a/dbeb4c54e9fc3b59622f410091365f354a69cda1af10c3b83ac0ca6e6f4f/pytest_shard-0.1.2-py3-none-any.whl", hash = "sha256:407a1df385cebe1feb9b4d2e7eeee8b044f8a24f0919421233159a17c59be2b9", size = 4608, upload-time = "2020-12-11T19:52:54.226Z" },
+]
+
 [[package]]
 name = "pytest-testmon"
 version = "2.2.0"