diff --git a/.github/workflows/sync-gpu-profiles.yml b/.github/workflows/sync-gpu-profiles.yml index c966d093..f98a3708 100644 --- a/.github/workflows/sync-gpu-profiles.yml +++ b/.github/workflows/sync-gpu-profiles.yml @@ -40,7 +40,7 @@ jobs: if: steps.diff.outputs.changed == 'true' id: version run: | - ver=$(head -2 deploy/fake-gpu-operator/templates/profiles/builtin.yaml | grep '# Source:' | sed 's/.*k8s-test-infra //') + ver=$(head -3 deploy/fake-gpu-operator/templates/profiles/builtin.yaml | grep '# Source:' | sed 's/.*k8s-test-infra //') echo "version=$ver" >> "$GITHUB_OUTPUT" - name: Create PR diff --git a/CHANGELOG.md b/CHANGELOG.md index 7b2a8bcc..21e5bb5f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,9 +12,18 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). ### Changed +- Built-in GPU profiles re-synced from NVIDIA/k8s-test-infra `main` (commit + `497fa04`): each profile now includes a `pcie_topology` block (PCI root + complexes with per-device `numa_node`), and a `gb300` profile is added. This + is what lets the mock backend report per-GPU NUMA affinity. (RUN-40173) +- `hack/sync-profiles.sh`: default source bumped `v0.1.0` → `main`; now resolves + a tag, branch, or commit SHA (was tags/branches only) and records the resolved + commit in the generated `# Source:` header. (RUN-40173) + ### Fixed - `device-plugin` injects `NODE_NAME` so non-DRA pods can run the fake `nvidia-smi`. ([#191](https://github.com/run-ai/fake-gpu-operator/issues/191)) +- `sync-gpu-profiles` workflow read the synced version with `head -2`, but the `# Source:` line is line 3, so the PR title/commit version was always empty. (RUN-40173) - CI `e2e-upgrade (latest-main)` lane no longer deadlocks resolving its baseline chart. It now walks `--first-parent` main commits (only those publish a `0.0.0-` chart, so merges no longer fill the window with unpublished diff --git a/deploy/fake-gpu-operator/templates/profiles/builtin.yaml b/deploy/fake-gpu-operator/templates/profiles/builtin.yaml index 86b59a53..bef22d6b 100644 --- a/deploy/fake-gpu-operator/templates/profiles/builtin.yaml +++ b/deploy/fake-gpu-operator/templates/profiles/builtin.yaml @@ -1,6 +1,6 @@ -{{- if (.Values.builtinProfiles).enabled }} +{{- if .Values.builtinProfiles.enabled }} # Auto-generated by hack/sync-profiles.sh — do not edit manually. -# Source: NVIDIA/k8s-test-infra v0.1.0 +# Source: NVIDIA/k8s-test-infra main (commit 497fa04) apiVersion: v1 kind: ConfigMap metadata: @@ -326,56 +326,56 @@ data: uuid: "GPU-12345678-1234-1234-1234-123456780000" serial: "1324821083800" pci: - bus_id: "00000000:07:00.0" + bus_id: "0000:07:00.0" minor_number: 0 - index: 1 uuid: "GPU-12345678-1234-1234-1234-123456780001" serial: "1324821083801" pci: - bus_id: "00000000:0F:00.0" + bus_id: "0000:0F:00.0" minor_number: 1 - index: 2 uuid: "GPU-12345678-1234-1234-1234-123456780002" serial: "1324821083802" pci: - bus_id: "00000000:47:00.0" + bus_id: "0000:47:00.0" minor_number: 2 - index: 3 uuid: "GPU-12345678-1234-1234-1234-123456780003" serial: "1324821083803" pci: - bus_id: "00000000:4E:00.0" + bus_id: "0000:4E:00.0" minor_number: 3 - index: 4 uuid: "GPU-12345678-1234-1234-1234-123456780004" serial: "1324821083804" pci: - bus_id: "00000000:87:00.0" + bus_id: "0000:87:00.0" minor_number: 4 - index: 5 uuid: "GPU-12345678-1234-1234-1234-123456780005" serial: "1324821083805" pci: - bus_id: "00000000:90:00.0" + bus_id: "0000:90:00.0" minor_number: 5 - index: 6 uuid: "GPU-12345678-1234-1234-1234-123456780006" serial: "1324821083806" pci: - bus_id: "00000000:B7:00.0" + bus_id: "0000:B7:00.0" minor_number: 6 - index: 7 uuid: "GPU-12345678-1234-1234-1234-123456780007" serial: "1324821083807" pci: - bus_id: "00000000:BD:00.0" + bus_id: "0000:BD:00.0" minor_number: 7 # ============================================================================= @@ -390,8 +390,48 @@ data: - link: 0 state: "active" remote_device_type: "gpu" - remote_pci_bus_id: "00000000:0F:00.0" + remote_pci_bus_id: "0000:0F:00.0" # ... define all 12 links for full topology + + # ============================================================================= + # InfiniBand topology - 1 ConnectX-6 HDR HCA per GPU (typical DGX A100) + # ============================================================================= + infiniband: + enabled: true + hca_type: "MT4123" # ConnectX-6 + fw_version: "20.36.1010" + hw_rev: "0x0" + board_id: "MT_0000000223" + link_layer: "InfiniBand" + rate_gbps: 200 # HDR + port_state: "ACTIVE" + phys_state: "LinkUp" + hcas_per_gpu: 1 + guid_prefix: "a288c2:0300:ab" + node_desc_template: "{node_name} mlx5_{idx}" + + # ============================================================================= + # PCIe topology - 2 NUMA nodes (dual EPYC), 4 GPUs each. + # Consumed by `render-pci-sysfs` to materialize a fake /sys/bus/pci tree + # under MOCK_PCI_ROOT; the NVIDIA DRA driver resolves `dra.k8s.io/pcieRoot` + # from these symlinks. + # ============================================================================= + pcie_topology: + root_complexes: + - id: "pci0000:00" + numa_node: 0 + devices: + - "0000:07:00.0" + - "0000:0F:00.0" + - "0000:47:00.0" + - "0000:4E:00.0" + - id: "pci0000:80" + numa_node: 1 + devices: + - "0000:87:00.0" + - "0000:90:00.0" + - "0000:B7:00.0" + - "0000:BD:00.0" --- apiVersion: v1 kind: ConfigMap @@ -725,56 +765,56 @@ data: uuid: "GPU-b2000000-0000-0000-0000-000000000000" serial: "1562849203700" pci: - bus_id: "00000000:1A:00.0" + bus_id: "0000:1A:00.0" minor_number: 0 - index: 1 uuid: "GPU-b2000000-0000-0000-0000-000000000001" serial: "1562849203701" pci: - bus_id: "00000000:1B:00.0" + bus_id: "0000:1B:00.0" minor_number: 1 - index: 2 uuid: "GPU-b2000000-0000-0000-0000-000000000002" serial: "1562849203702" pci: - bus_id: "00000000:4A:00.0" + bus_id: "0000:4A:00.0" minor_number: 2 - index: 3 uuid: "GPU-b2000000-0000-0000-0000-000000000003" serial: "1562849203703" pci: - bus_id: "00000000:4B:00.0" + bus_id: "0000:4B:00.0" minor_number: 3 - index: 4 uuid: "GPU-b2000000-0000-0000-0000-000000000004" serial: "1562849203704" pci: - bus_id: "00000000:8A:00.0" + bus_id: "0000:8A:00.0" minor_number: 4 - index: 5 uuid: "GPU-b2000000-0000-0000-0000-000000000005" serial: "1562849203705" pci: - bus_id: "00000000:8B:00.0" + bus_id: "0000:8B:00.0" minor_number: 5 - index: 6 uuid: "GPU-b2000000-0000-0000-0000-000000000006" serial: "1562849203706" pci: - bus_id: "00000000:CA:00.0" + bus_id: "0000:CA:00.0" minor_number: 6 - index: 7 uuid: "GPU-b2000000-0000-0000-0000-000000000007" serial: "1562849203707" pci: - bus_id: "00000000:CB:00.0" + bus_id: "0000:CB:00.0" minor_number: 7 # ============================================================================= @@ -788,7 +828,47 @@ data: - link: 0 state: "active" remote_device_type: "gpu" - remote_pci_bus_id: "00000000:1B:00.0" + remote_pci_bus_id: "0000:1B:00.0" + + # ============================================================================= + # InfiniBand topology - 1 ConnectX-7 NDR HCA per GPU (typical HGX B200) + # ============================================================================= + infiniband: + enabled: true + hca_type: "MT4129" + fw_version: "28.40.1000" + hw_rev: "0x0" + board_id: "MT_0000000838" + link_layer: "InfiniBand" + rate_gbps: 400 + port_state: "ACTIVE" + phys_state: "LinkUp" + hcas_per_gpu: 1 + guid_prefix: "b288c2:0300:ab" + node_desc_template: "{node_name} mlx5_{idx}" + + # ============================================================================= + # PCIe topology - B200, 2 NUMA nodes, 4 GPUs each. + # Consumed by `render-pci-sysfs` to materialize a fake /sys/bus/pci tree + # under MOCK_PCI_ROOT; the NVIDIA DRA driver resolves `dra.k8s.io/pcieRoot` + # from these symlinks. + # ============================================================================= + pcie_topology: + root_complexes: + - id: "pci0000:00" + numa_node: 0 + devices: + - "0000:1A:00.0" + - "0000:1B:00.0" + - "0000:4A:00.0" + - "0000:4B:00.0" + - id: "pci0000:80" + numa_node: 1 + devices: + - "0000:8A:00.0" + - "0000:8B:00.0" + - "0000:CA:00.0" + - "0000:CB:00.0" --- apiVersion: v1 kind: ConfigMap @@ -836,6 +916,20 @@ data: minor: 0 num_gpu_cores: 18432 # Blackwell has more CUDA cores + # --------------------------------------------------------------------------- + # NVLink fabric (GB200 ComputeDomain). The clique_id / cluster_uuid below + # are placeholder defaults; the topology-overlay step in the engine + # rewrites them per-node based on the cluster-level topology ConfigMap + # (set Helm value `topology.enabled=true`). Nodes that don't appear in + # the topology fall through to these defaults, which is sufficient for + # single-node experiments. See pkg/gpu/mocknvml/engine/fabric.go. + # --------------------------------------------------------------------------- + fabric: + cluster_uuid: "00000000-0000-0000-0000-000000000001" + clique_id: 0 + state: "completed" + health_mask: 0 + # --------------------------------------------------------------------------- # InfoROM versions # --------------------------------------------------------------------------- @@ -1131,66 +1225,66 @@ data: # ============================================================================= devices: - index: 0 - uuid: "GPU-9b200000-0000-0000-0000-000000000000" + uuid: "GPU-b200b200-0000-0000-0000-000000000000" serial: "1562849103700" pci: - bus_id: "00000000:0A:00.0" + bus_id: "0000:0A:00.0" minor_number: 0 grace_cpu_pair: 0 # Paired with Grace CPU 0 - index: 1 - uuid: "GPU-9b200000-0000-0000-0000-000000000001" + uuid: "GPU-b200b200-0000-0000-0000-000000000001" serial: "1562849103701" pci: - bus_id: "00000000:0B:00.0" + bus_id: "0000:0B:00.0" minor_number: 1 grace_cpu_pair: 0 # Same superchip as GPU 0 - index: 2 - uuid: "GPU-9b200000-0000-0000-0000-000000000002" + uuid: "GPU-b200b200-0000-0000-0000-000000000002" serial: "1562849103702" pci: - bus_id: "00000000:4A:00.0" + bus_id: "0000:4A:00.0" minor_number: 2 grace_cpu_pair: 1 - index: 3 - uuid: "GPU-9b200000-0000-0000-0000-000000000003" + uuid: "GPU-b200b200-0000-0000-0000-000000000003" serial: "1562849103703" pci: - bus_id: "00000000:4B:00.0" + bus_id: "0000:4B:00.0" minor_number: 3 grace_cpu_pair: 1 - index: 4 - uuid: "GPU-9b200000-0000-0000-0000-000000000004" + uuid: "GPU-b200b200-0000-0000-0000-000000000004" serial: "1562849103704" pci: - bus_id: "00000000:8A:00.0" + bus_id: "0000:8A:00.0" minor_number: 4 grace_cpu_pair: 2 - index: 5 - uuid: "GPU-9b200000-0000-0000-0000-000000000005" + uuid: "GPU-b200b200-0000-0000-0000-000000000005" serial: "1562849103705" pci: - bus_id: "00000000:8B:00.0" + bus_id: "0000:8B:00.0" minor_number: 5 grace_cpu_pair: 2 - index: 6 - uuid: "GPU-9b200000-0000-0000-0000-000000000006" + uuid: "GPU-b200b200-0000-0000-0000-000000000006" serial: "1562849103706" pci: - bus_id: "00000000:CA:00.0" + bus_id: "0000:CA:00.0" minor_number: 6 grace_cpu_pair: 3 - index: 7 - uuid: "GPU-9b200000-0000-0000-0000-000000000007" + uuid: "GPU-b200b200-0000-0000-0000-000000000007" serial: "1562849103707" pci: - bus_id: "00000000:CB:00.0" + bus_id: "0000:CB:00.0" minor_number: 7 grace_cpu_pair: 3 @@ -1209,12 +1303,547 @@ data: - link: 0 state: "active" remote_device_type: "gpu" - remote_pci_bus_id: "00000000:0B:00.0" + remote_pci_bus_id: "0000:0B:00.0" + - link: 1 + state: "active" + remote_device_type: "cpu" # NVLink-C2C to Grace + remote_pci_bus_id: "N/A" + # ... define all 18 links for full topology + + # ============================================================================= + # InfiniBand topology - 1 ConnectX-7 NDR HCA per GPU (typical GB200 NVL72) + # ============================================================================= + infiniband: + enabled: true + hca_type: "MT4129" + fw_version: "28.40.1000" + hw_rev: "0x0" + board_id: "MT_0000000838" + link_layer: "InfiniBand" + rate_gbps: 400 + port_state: "ACTIVE" + phys_state: "LinkUp" + hcas_per_gpu: 1 + guid_prefix: "9b88c2:0300:ab" + node_desc_template: "{node_name} mlx5_{idx}" + + # ============================================================================= + # PCIe topology - GB200, 4 Grace CPU pairs -> 4 NUMA nodes, 2 GPUs each. + # Consumed by `render-pci-sysfs` to materialize a fake /sys/bus/pci tree + # under MOCK_PCI_ROOT; the NVIDIA DRA driver resolves `dra.k8s.io/pcieRoot` + # from these symlinks. + # ============================================================================= + pcie_topology: + root_complexes: + - id: "pci0000:00" + numa_node: 0 + devices: + - "0000:0A:00.0" + - "0000:0B:00.0" + - id: "pci0000:40" + numa_node: 1 + devices: + - "0000:4A:00.0" + - "0000:4B:00.0" + - id: "pci0000:80" + numa_node: 2 + devices: + - "0000:8A:00.0" + - "0000:8B:00.0" + - id: "pci0000:c0" + numa_node: 3 + devices: + - "0000:CA:00.0" + - "0000:CB:00.0" +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: gpu-profile-gb300 + labels: + fake-gpu-operator/gpu-profile: "true" +data: + profile.yaml: | + # Mock NVML Configuration: GB300 NVL (Grace-Blackwell Ultra Superchip) + # Full configuration for nvidia-smi -x -q compatibility + # Use: export MOCK_NVML_CONFIG=/path/to/this/file.yaml + + version: "1.0" + + # ============================================================================= + # System-level configuration + # ============================================================================= + system: + driver_version: "570.124.06" + nvml_version: "12.570.124.06" + cuda_version: "12.8" + cuda_version_major: 12 + cuda_version_minor: 8 + + # ============================================================================= + # Default device configuration (applied to all devices unless overridden) + # ============================================================================= + device_defaults: + # --------------------------------------------------------------------------- + # Basic identification + # --------------------------------------------------------------------------- + name: "NVIDIA GB300 NVL" + brand: "nvidia" + serial: "1573041103750" + board_part_number: "699-2G530-0300-000" + vbios_version: "97.00.41.00.01" + + # --------------------------------------------------------------------------- + # Architecture - Blackwell Ultra (still sm_100 family for compute_capability, + # but with higher TFLOPS per SM than the standard Blackwell B200/GB200) + # --------------------------------------------------------------------------- + architecture: "blackwell" + compute_capability: + major: 10 + minor: 0 + num_gpu_cores: 21632 # Blackwell Ultra: more cores than B200/GB200 + + # --------------------------------------------------------------------------- + # NVLink fabric (GB300 ComputeDomain). The clique_id / cluster_uuid below + # are placeholder defaults; the topology-overlay step in the engine + # rewrites them per-node based on the cluster-level topology ConfigMap + # (set Helm value `topology.enabled=true`). Nodes that don't appear in + # the topology fall through to these defaults, which is sufficient for + # single-node experiments. See pkg/gpu/mocknvml/engine/fabric.go. + # --------------------------------------------------------------------------- + fabric: + cluster_uuid: "00000000-0000-0000-0000-000000000001" + clique_id: 0 + state: "completed" + health_mask: 0 + + # --------------------------------------------------------------------------- + # InfoROM versions + # --------------------------------------------------------------------------- + inforom: + image_version: "G530.0300.00.01" + oem_object: "2.1" + ecc_object: "7.20" + pwr_object: "1.0" + + # --------------------------------------------------------------------------- + # Memory configuration - 288 GiB HBM3e per GPU (1.5x GB200's 192 GiB) + # --------------------------------------------------------------------------- + memory: + total_bytes: 309237645312 # 288 GiB HBM3e + reserved_bytes: 1610612736 # ~1.5 GiB reserved + free_bytes: 307627032576 # total - reserved at idle + used_bytes: 0 + memory_bus_width: 8192 + + bar1_memory: + total_bytes: 824633720832 # 768 GiB (unified memory with Grace, grown for B300) + free_bytes: 824633720832 + used_bytes: 0 + + # --------------------------------------------------------------------------- + # PCI configuration + # --------------------------------------------------------------------------- + pci: + device_id: 0x294110DE # GB300 NVL (mock placeholder, Blackwell Ultra) + subsystem_id: 0x183010DE + + pcie: + max_link_gen: 6 # PCIe Gen6 (or NVLink-C2C to Grace) + current_link_gen: 6 + max_link_width: 16 # x16 + current_link_width: 16 + replay_counter: 0 + tx_throughput_kbps: 0 + rx_throughput_kbps: 0 + + # --------------------------------------------------------------------------- + # Power configuration - GB300 is higher TDP than GB200 + # (B300 GPU alone advertised at up to 1.4 kW; full superchip headroom higher) + # --------------------------------------------------------------------------- + power: + management_supported: true + management_mode: "enabled" + default_limit_mw: 1400000 # 1400W per B300 GPU + enforced_limit_mw: 1400000 + min_limit_mw: 500000 # 500W minimum + max_limit_mw: 1600000 # 1600W boost ceiling + current_draw_mw: 175000 # 175W idle (liquid cooled) + power_state: "P0" + total_energy_consumption_mj: 1000000 # millijoules since boot + + # --------------------------------------------------------------------------- + # Thermal configuration (same liquid-cooled envelope as GB200) + # --------------------------------------------------------------------------- + thermal: + temperature_gpu_c: 38 # Idle temperature (liquid cooled) + temperature_memory_c: 36 + shutdown_threshold_c: 95 + slowdown_threshold_c: 90 + max_operating_c: 85 + target_temperature_c: 85 + + # --------------------------------------------------------------------------- + # Fan configuration (N/A - liquid cooled) + # --------------------------------------------------------------------------- + fan: + count: 0 # Liquid cooled, no fans + speed_percent: "N/A" + target_speed_percent: "N/A" + + # --------------------------------------------------------------------------- + # Clock speeds (MHz) - Blackwell Ultra pushes boost slightly higher + # --------------------------------------------------------------------------- + clocks: + graphics_current: 345 # Idle + graphics_max: 2200 # Boost + graphics_app: 2200 + graphics_app_default: 2200 + sm_current: 345 + sm_max: 2200 + memory_current: 2625 # HBM3e 8 Gbps (1.05x GB200's 2500) + memory_max: 2625 + memory_app: 2625 + memory_app_default: 2625 + video_current: 1200 + video_max: 2200 + + # --------------------------------------------------------------------------- + # Clocks throttle reasons + # --------------------------------------------------------------------------- + clocks_throttle_reasons: + gpu_idle: true + applications_clocks_setting: false + sw_power_cap: false + hw_slowdown: false + hw_thermal_slowdown: false + hw_power_brake_slowdown: false + sync_boost: false + sw_thermal_slowdown: false + display_clocks_setting: false + + # --------------------------------------------------------------------------- + # Supported clocks + # --------------------------------------------------------------------------- + supported_clocks: + memory_clocks: + - freq_mhz: 2625 + graphics_clocks: [345, 690, 1035, 1380, 1725, 1980, 2200] + + # --------------------------------------------------------------------------- + # Performance state + # --------------------------------------------------------------------------- + performance_state: "P0" + + # --------------------------------------------------------------------------- + # Utilization (percentage, 0-100) + # --------------------------------------------------------------------------- + utilization: + gpu: 0 + memory: 0 + encoder: 0 + decoder: 0 + jpeg: 0 + ofa: 0 + + # --------------------------------------------------------------------------- + # Encoder/Decoder statistics + # --------------------------------------------------------------------------- + encoder_stats: + session_count: 0 + average_fps: 0 + average_latency_us: 0 + + fbc_stats: + session_count: 0 + average_fps: 0 + average_latency_us: 0 + + # --------------------------------------------------------------------------- + # ECC configuration + # --------------------------------------------------------------------------- + ecc: + mode_current: "enabled" + mode_pending: "enabled" + default_mode: "enabled" + errors: + volatile: + single_bit: + device_memory: 0 + l1_cache: 0 + l2_cache: 0 + register_file: 0 + texture_memory: 0 + total: 0 + double_bit: + device_memory: 0 + l1_cache: 0 + l2_cache: 0 + register_file: 0 + texture_memory: 0 + total: 0 + aggregate: + single_bit: + device_memory: 0 + l1_cache: 0 + l2_cache: 0 + register_file: 0 + texture_memory: 0 + total: 0 + double_bit: + device_memory: 0 + l1_cache: 0 + l2_cache: 0 + register_file: 0 + texture_memory: 0 + total: 0 + + # --------------------------------------------------------------------------- + # Retired pages + # --------------------------------------------------------------------------- + retired_pages: + single_bit_retirement: + count: 0 + addresses: [] + double_bit_retirement: + count: 0 + addresses: [] + pending_blacklist: false + pending_retirement: false + + # --------------------------------------------------------------------------- + # Remapped rows + # --------------------------------------------------------------------------- + remapped_rows: + correctable: 0 + uncorrectable: 0 + pending: false + failure_occurred: false + + # --------------------------------------------------------------------------- + # Display configuration + # --------------------------------------------------------------------------- + display: + mode: "disabled" + active: "disabled" + + # --------------------------------------------------------------------------- + # Persistence mode + # --------------------------------------------------------------------------- + persistence_mode: "enabled" + + # --------------------------------------------------------------------------- + # Compute mode + # --------------------------------------------------------------------------- + compute_mode: "default" + + # --------------------------------------------------------------------------- + # MIG configuration - GB300 supports MIG + # --------------------------------------------------------------------------- + mig: + mode_current: "disabled" + mode_pending: "disabled" + max_gpu_instances: 7 + + # --------------------------------------------------------------------------- + # GPU operation mode (GOM) + # --------------------------------------------------------------------------- + gpu_operation_mode: + current: "all_on" + pending: "all_on" + + # --------------------------------------------------------------------------- + # Driver model (Windows only) + # --------------------------------------------------------------------------- + driver_model: + current: "N/A" + pending: "N/A" + + # --------------------------------------------------------------------------- + # Accounting mode + # --------------------------------------------------------------------------- + accounting: + mode: "disabled" + buffer_size: 4000 + + # --------------------------------------------------------------------------- + # Virtualization + # --------------------------------------------------------------------------- + virtualization: + mode: "none" + host_vgpu_mode: "N/A" + + # --------------------------------------------------------------------------- + # GSP Firmware + # --------------------------------------------------------------------------- + gsp_firmware: + mode: "enabled" + version: "570.124.06" + + # --------------------------------------------------------------------------- + # Blackwell Ultra-specific features + # --------------------------------------------------------------------------- + features: + transformer_engine: true # 2nd gen Transformer Engine + fp4_support: true # FP4 precision support + fp6_support: true # FP6 precision (Blackwell Ultra) + fp8_support: true # FP8 precision support + confidential_compute: true # CC/TEE support + nvlink_c2c: true # NVLink Chip-to-Chip (Grace connection) + decompression_engine: true # Hardware decompression + fifth_gen_tensor_cores: true # 5th generation tensor cores + + # --------------------------------------------------------------------------- + # Grace CPU pairing (GB300 superchip) + # --------------------------------------------------------------------------- + grace_superchip: + enabled: true + cpu_cores: 72 # Grace CPU cores (same Grace as GB200) + cpu_memory_gb: 480 # LPDDR5X per Grace CPU + coherent_memory: true # NVLink-C2C coherent memory + + # --------------------------------------------------------------------------- + # Processes (empty at idle) + # --------------------------------------------------------------------------- + processes: [] + + # ============================================================================= + # Device-specific overrides + # GB300 NVL72 has 72 GPUs, but we define 8 for testing (4 superchip trays, + # each with 1 Grace CPU + 2 B300 GPUs, matching the GB200 layout). + # ============================================================================= + devices: + - index: 0 + uuid: "GPU-b300b300-0000-0000-0000-000000000000" + serial: "1573041103700" + pci: + bus_id: "0000:0A:00.0" + minor_number: 0 + grace_cpu_pair: 0 # Paired with Grace CPU 0 + + - index: 1 + uuid: "GPU-b300b300-0000-0000-0000-000000000001" + serial: "1573041103701" + pci: + bus_id: "0000:0B:00.0" + minor_number: 1 + grace_cpu_pair: 0 # Same superchip as GPU 0 + + - index: 2 + uuid: "GPU-b300b300-0000-0000-0000-000000000002" + serial: "1573041103702" + pci: + bus_id: "0000:4A:00.0" + minor_number: 2 + grace_cpu_pair: 1 + + - index: 3 + uuid: "GPU-b300b300-0000-0000-0000-000000000003" + serial: "1573041103703" + pci: + bus_id: "0000:4B:00.0" + minor_number: 3 + grace_cpu_pair: 1 + + - index: 4 + uuid: "GPU-b300b300-0000-0000-0000-000000000004" + serial: "1573041103704" + pci: + bus_id: "0000:8A:00.0" + minor_number: 4 + grace_cpu_pair: 2 + + - index: 5 + uuid: "GPU-b300b300-0000-0000-0000-000000000005" + serial: "1573041103705" + pci: + bus_id: "0000:8B:00.0" + minor_number: 5 + grace_cpu_pair: 2 + + - index: 6 + uuid: "GPU-b300b300-0000-0000-0000-000000000006" + serial: "1573041103706" + pci: + bus_id: "0000:CA:00.0" + minor_number: 6 + grace_cpu_pair: 3 + + - index: 7 + uuid: "GPU-b300b300-0000-0000-0000-000000000007" + serial: "1573041103707" + pci: + bus_id: "0000:CB:00.0" + minor_number: 7 + grace_cpu_pair: 3 + + # ============================================================================= + # NVLink topology - GB300 uses NVLink 5.0 (same fabric as GB200) + # ============================================================================= + nvlink: + version: 5 + links_per_gpu: 18 + bandwidth_per_link_gbps: 100 # 100 GB/s per link = 1.8 TB/s total per GPU + switch_support: true # NVLink Switch for scale-out + switch_count: 0 # Set for NVL72 configurations (up to 9 switches) + c2c_enabled: true # NVLink-C2C to Grace CPU + # NVLink state per link (18 links) + links: + - link: 0 + state: "active" + remote_device_type: "gpu" + remote_pci_bus_id: "0000:0B:00.0" - link: 1 state: "active" remote_device_type: "cpu" # NVLink-C2C to Grace remote_pci_bus_id: "N/A" # ... define all 18 links for full topology + + # ============================================================================= + # InfiniBand topology - 1 ConnectX-7 NDR HCA per GPU (typical GB300 NVL72) + # ============================================================================= + infiniband: + enabled: true + hca_type: "MT4129" + fw_version: "28.43.1000" + hw_rev: "0x0" + board_id: "MT_0000000838" + link_layer: "InfiniBand" + rate_gbps: 400 + port_state: "ACTIVE" + phys_state: "LinkUp" + hcas_per_gpu: 1 + guid_prefix: "9b88c2:0301:cd" + node_desc_template: "{node_name} mlx5_{idx}" + + # ============================================================================= + # PCIe topology (consumed by render-pci-sysfs to materialize + # /sys/bus/pci/devices/ -> ../../../devices/pciDDDD:BB/ symlinks + # and per-device numa_node files). Mirrors the GB300 NVL superchip layout: + # one PCI root complex per Grace+2×B300 tray, NUMA-aligned per tray. + # ============================================================================= + pcie_topology: + root_complexes: + - id: "pci0000:00" + numa_node: 0 + devices: + - "0000:0A:00.0" + - "0000:0B:00.0" + - id: "pci0000:40" + numa_node: 1 + devices: + - "0000:4A:00.0" + - "0000:4B:00.0" + - id: "pci0000:80" + numa_node: 2 + devices: + - "0000:8A:00.0" + - "0000:8B:00.0" + - id: "pci0000:c0" + numa_node: 3 + devices: + - "0000:CA:00.0" + - "0000:CB:00.0" --- apiVersion: v1 kind: ConfigMap @@ -1542,59 +2171,59 @@ data: # ============================================================================= devices: - index: 0 - uuid: "GPU-f1000000-0000-0000-0000-000000000000" + uuid: "GPU-01000100-0000-0000-0000-000000000000" serial: "1562821083900" pci: - bus_id: "00000000:1A:00.0" + bus_id: "0000:1A:00.0" minor_number: 0 - index: 1 - uuid: "GPU-f1000000-0000-0000-0000-000000000001" + uuid: "GPU-01000100-0000-0000-0000-000000000001" serial: "1562821083901" pci: - bus_id: "00000000:1B:00.0" + bus_id: "0000:1B:00.0" minor_number: 1 - index: 2 - uuid: "GPU-f1000000-0000-0000-0000-000000000002" + uuid: "GPU-01000100-0000-0000-0000-000000000002" serial: "1562821083902" pci: - bus_id: "00000000:4A:00.0" + bus_id: "0000:4A:00.0" minor_number: 2 - index: 3 - uuid: "GPU-f1000000-0000-0000-0000-000000000003" + uuid: "GPU-01000100-0000-0000-0000-000000000003" serial: "1562821083903" pci: - bus_id: "00000000:4B:00.0" + bus_id: "0000:4B:00.0" minor_number: 3 - index: 4 - uuid: "GPU-f1000000-0000-0000-0000-000000000004" + uuid: "GPU-01000100-0000-0000-0000-000000000004" serial: "1562821083904" pci: - bus_id: "00000000:8A:00.0" + bus_id: "0000:8A:00.0" minor_number: 4 - index: 5 - uuid: "GPU-f1000000-0000-0000-0000-000000000005" + uuid: "GPU-01000100-0000-0000-0000-000000000005" serial: "1562821083905" pci: - bus_id: "00000000:8B:00.0" + bus_id: "0000:8B:00.0" minor_number: 5 - index: 6 - uuid: "GPU-f1000000-0000-0000-0000-000000000006" + uuid: "GPU-01000100-0000-0000-0000-000000000006" serial: "1562821083906" pci: - bus_id: "00000000:CA:00.0" + bus_id: "0000:CA:00.0" minor_number: 6 - index: 7 - uuid: "GPU-f1000000-0000-0000-0000-000000000007" + uuid: "GPU-01000100-0000-0000-0000-000000000007" serial: "1562821083907" pci: - bus_id: "00000000:CB:00.0" + bus_id: "0000:CB:00.0" minor_number: 7 # ============================================================================= @@ -1608,7 +2237,47 @@ data: - link: 0 state: "active" remote_device_type: "gpu" - remote_pci_bus_id: "00000000:1B:00.0" + remote_pci_bus_id: "0000:1B:00.0" + + # ============================================================================= + # InfiniBand topology - 1 ConnectX-7 NDR HCA per GPU (typical HGX H100) + # ============================================================================= + infiniband: + enabled: true + hca_type: "MT4129" # ConnectX-7 + fw_version: "28.39.2048" + hw_rev: "0x0" + board_id: "MT_0000000838" + link_layer: "InfiniBand" + rate_gbps: 400 # NDR + port_state: "ACTIVE" + phys_state: "LinkUp" + hcas_per_gpu: 1 # total HCAs = gpu.count * hcas_per_gpu + guid_prefix: "a088c2:0300:ab" # last byte = HCA index + node_desc_template: "{node_name} mlx5_{idx}" + + # ============================================================================= + # PCIe topology - HGX H100, 2 NUMA nodes, 4 GPUs each. + # Consumed by `render-pci-sysfs` to materialize a fake /sys/bus/pci tree + # under MOCK_PCI_ROOT; the NVIDIA DRA driver resolves `dra.k8s.io/pcieRoot` + # from these symlinks. + # ============================================================================= + pcie_topology: + root_complexes: + - id: "pci0000:00" + numa_node: 0 + devices: + - "0000:1A:00.0" + - "0000:1B:00.0" + - "0000:4A:00.0" + - "0000:4B:00.0" + - id: "pci0000:80" + numa_node: 1 + devices: + - "0000:8A:00.0" + - "0000:8B:00.0" + - "0000:CA:00.0" + - "0000:CB:00.0" --- apiVersion: v1 kind: ConfigMap @@ -1928,60 +2597,90 @@ data: # ============================================================================= devices: - index: 0 - uuid: "GPU-1d400000-0000-0000-0000-000000000000" + uuid: "GPU-14050000-0000-0000-0000-000000000000" serial: "1562830094500" pci: - bus_id: "00000000:17:00.0" + bus_id: "0000:17:00.0" minor_number: 0 - index: 1 - uuid: "GPU-1d400000-0000-0000-0000-000000000001" + uuid: "GPU-14050000-0000-0000-0000-000000000001" serial: "1562830094501" pci: - bus_id: "00000000:31:00.0" + bus_id: "0000:31:00.0" minor_number: 1 - index: 2 - uuid: "GPU-1d400000-0000-0000-0000-000000000002" + uuid: "GPU-14050000-0000-0000-0000-000000000002" serial: "1562830094502" pci: - bus_id: "00000000:B1:00.0" + bus_id: "0000:B1:00.0" minor_number: 2 - index: 3 - uuid: "GPU-1d400000-0000-0000-0000-000000000003" + uuid: "GPU-14050000-0000-0000-0000-000000000003" serial: "1562830094503" pci: - bus_id: "00000000:CA:00.0" + bus_id: "0000:CA:00.0" minor_number: 3 - index: 4 - uuid: "GPU-1d400000-0000-0000-0000-000000000004" + uuid: "GPU-14050000-0000-0000-0000-000000000004" serial: "1562830094504" pci: - bus_id: "00000000:18:00.0" + bus_id: "0000:18:00.0" minor_number: 4 - index: 5 - uuid: "GPU-1d400000-0000-0000-0000-000000000005" + uuid: "GPU-14050000-0000-0000-0000-000000000005" serial: "1562830094505" pci: - bus_id: "00000000:32:00.0" + bus_id: "0000:32:00.0" minor_number: 5 - index: 6 - uuid: "GPU-1d400000-0000-0000-0000-000000000006" + uuid: "GPU-14050000-0000-0000-0000-000000000006" serial: "1562830094506" pci: - bus_id: "00000000:B2:00.0" + bus_id: "0000:B2:00.0" minor_number: 6 - index: 7 - uuid: "GPU-1d400000-0000-0000-0000-000000000007" + uuid: "GPU-14050000-0000-0000-0000-000000000007" serial: "1562830094507" pci: - bus_id: "00000000:CB:00.0" + bus_id: "0000:CB:00.0" minor_number: 7 + + # ============================================================================= + # InfiniBand topology - L40S systems typically ship without IB HCAs. + # Set enabled=true via Helm values to simulate one if needed. + # ============================================================================= + infiniband: + enabled: false + + # ============================================================================= + # PCIe topology - L40S, 2 NUMA nodes, 4 GPUs each. + # Consumed by `render-pci-sysfs` to materialize a fake /sys/bus/pci tree + # under MOCK_PCI_ROOT; the NVIDIA DRA driver resolves `dra.k8s.io/pcieRoot` + # from these symlinks. + # ============================================================================= + pcie_topology: + root_complexes: + - id: "pci0000:00" + numa_node: 0 + devices: + - "0000:17:00.0" + - "0000:18:00.0" + - "0000:31:00.0" + - "0000:32:00.0" + - id: "pci0000:80" + numa_node: 1 + devices: + - "0000:B1:00.0" + - "0000:B2:00.0" + - "0000:CA:00.0" + - "0000:CB:00.0" --- apiVersion: v1 kind: ConfigMap @@ -2301,30 +3000,52 @@ data: # ============================================================================= devices: - index: 0 - uuid: "GPU-00040000-0000-0000-0000-000000000000" + uuid: "GPU-00000074-0000-0000-0000-000000000000" serial: "0421819085400" pci: - bus_id: "00000000:3B:00.0" + bus_id: "0000:3B:00.0" minor_number: 0 - index: 1 - uuid: "GPU-00040000-0000-0000-0000-000000000001" + uuid: "GPU-00000074-0000-0000-0000-000000000001" serial: "0421819085401" pci: - bus_id: "00000000:86:00.0" + bus_id: "0000:86:00.0" minor_number: 1 - index: 2 - uuid: "GPU-00040000-0000-0000-0000-000000000002" + uuid: "GPU-00000074-0000-0000-0000-000000000002" serial: "0421819085402" pci: - bus_id: "00000000:AF:00.0" + bus_id: "0000:AF:00.0" minor_number: 2 - index: 3 - uuid: "GPU-00040000-0000-0000-0000-000000000003" + uuid: "GPU-00000074-0000-0000-0000-000000000003" serial: "0421819085403" pci: - bus_id: "00000000:D8:00.0" + bus_id: "0000:D8:00.0" minor_number: 3 + + # ============================================================================= + # InfiniBand topology - T4 systems do not ship with IB HCAs. + # ============================================================================= + infiniband: + enabled: false + + # ============================================================================= + # PCIe topology - T4 inference card, single NUMA node, 4 GPUs. + # Consumed by `render-pci-sysfs` to materialize a fake /sys/bus/pci tree + # under MOCK_PCI_ROOT; the NVIDIA DRA driver resolves `dra.k8s.io/pcieRoot` + # from these symlinks. + # ============================================================================= + pcie_topology: + root_complexes: + - id: "pci0000:00" + numa_node: 0 + devices: + - "0000:3B:00.0" + - "0000:86:00.0" + - "0000:AF:00.0" + - "0000:D8:00.0" {{- end }} diff --git a/hack/sync-profiles.sh b/hack/sync-profiles.sh index b2694e54..dbd1dbb5 100755 --- a/hack/sync-profiles.sh +++ b/hack/sync-profiles.sh @@ -4,13 +4,15 @@ # the built-in profile ConfigMaps for the fake-gpu-operator Helm chart. # # Usage: -# hack/sync-profiles.sh # use default pinned version -# hack/sync-profiles.sh v0.2.0 # override version +# hack/sync-profiles.sh # sync from upstream main (default) +# hack/sync-profiles.sh v0.2.0 # override with a tag, branch, or commit SHA # set -euo pipefail REPO="https://github.com/NVIDIA/k8s-test-infra.git" -DEFAULT_VERSION="v0.1.0" +# Default to main; the committed builtin.yaml + the header SHA are the pin. +# pcie_topology is main-only for now (no release has it). Override with a ref. +DEFAULT_VERSION="main" VERSION="${1:-$DEFAULT_VERSION}" UPSTREAM_PATH="deployments/nvml-mock/helm/nvml-mock/profiles" @@ -22,11 +24,14 @@ TMPDIR="$(mktemp -d)" trap 'rm -rf "$TMPDIR"' EXIT echo "Cloning NVIDIA/k8s-test-infra @ ${VERSION} ..." -git -c advice.detachedHead=false clone --depth 1 --branch "$VERSION" \ +# Blobless+sparse clone then checkout, so VERSION may be a tag, branch, or SHA. +git -c advice.detachedHead=false clone \ --filter=blob:none --sparse "$REPO" "$TMPDIR/upstream" 2>&1 \ - | grep -v "^Updating files" + | grep -v "^Updating files" || true cd "$TMPDIR/upstream" git sparse-checkout set "$UPSTREAM_PATH" 2>/dev/null +git -c advice.detachedHead=false checkout "$VERSION" 2>&1 | grep -v "^Updating files" || true +RESOLVED_SHA="$(git rev-parse --short HEAD)" PROFILES_DIR="$TMPDIR/upstream/$UPSTREAM_PATH" if [ ! -d "$PROFILES_DIR" ] || [ -z "$(ls "$PROFILES_DIR"/*.yaml 2>/dev/null)" ]; then @@ -41,7 +46,7 @@ FIRST=true echo '{{- if .Values.builtinProfiles.enabled }}' >> "$OUTPUT_FILE" echo "# Auto-generated by hack/sync-profiles.sh — do not edit manually." >> "$OUTPUT_FILE" -echo "# Source: NVIDIA/k8s-test-infra ${VERSION}" >> "$OUTPUT_FILE" +echo "# Source: NVIDIA/k8s-test-infra ${VERSION} (commit ${RESOLVED_SHA})" >> "$OUTPUT_FILE" for profile_file in "$PROFILES_DIR"/*.yaml; do name="$(basename "$profile_file" .yaml)"