diff --git a/.github/workflows/ci-cas-security-dockerfile.yml b/.github/workflows/ci-cas-security-dockerfile.yml new file mode 100644 index 00000000..5c1fef80 --- /dev/null +++ b/.github/workflows/ci-cas-security-dockerfile.yml @@ -0,0 +1,38 @@ +name: Security Scanning (dockerfile) + +on: + schedule: + # Every 2 months on 1st at midnight UTC + - cron: '0 0 1 */2 *' + # Manual trigger for testing + workflow_dispatch: + +jobs: + security: + runs-on: ["innersource.prod.amr.dind"] + if: ${{ github.repository != 'intel/pcm' }} + + permissions: + contents: read + pull-requests: write + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + submodules: false + + - uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: CAS Security Orchestrator (Source Only) + uses: intel-innersource/applications.security.monitoring.cas@v2 + with: + sdl-api-key: ${{ secrets.SDL_API_KEY }} + sdl-project-id: ${{ secrets.SDL_PROJECT_ID }} + sdl-idsid-value: ${{ secrets.SDL_IDSID_VALUE }} + # Scan for SDL419 only (workaround), SDL441 has separate scan + sdl-tasks: "SDL419" + # Set branch type to 'release' for scheduled runs and manual trigger on main branch. + # Required for automatic evidence submit. Excludes push events. + branch-type: ${{ github.ref == 'refs/heads/main' && (github.event_name == 'workflow_dispatch' || github.event_name == 'schedule') && 'release' || 'dev' }} \ No newline at end of file diff --git a/.github/workflows/ci-cas-security.yml b/.github/workflows/ci-cas-security.yml new file mode 100644 index 00000000..71d6312f --- /dev/null +++ b/.github/workflows/ci-cas-security.yml @@ -0,0 +1,43 @@ +name: Security Scanning (Source Code) + +on: + schedule: + # Every 2 months on 1st at midnight UTC + - cron: '0 0 1 */2 *' + # Manual trigger for testing + workflow_dispatch: + +jobs: + security: + runs-on: ["innersource.prod.amr.dind"] + if: ${{ github.repository != 'intel/pcm' }} + + permissions: + contents: read + pull-requests: write + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + submodules: recursive + + - uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Install Python dependencies for SCA scanning + run: | + pip install -r perfmon/requirements.txt || true + pip install -r perfmon/scripts/ci/verify_mapfile/requirements.txt || true + pip install -r Intel-PMT/tools/docker/requirements.txt || true + + - name: CAS Security Orchestrator (Source Only) + uses: intel-innersource/applications.security.monitoring.cas@v2 + with: + sdl-api-key: ${{ secrets.SDL_API_KEY }} + sdl-project-id: ${{ secrets.SDL_PROJECT_ID }} + sdl-idsid-value: ${{ secrets.SDL_IDSID_VALUE }} + sdl-tasks: "SDL441" # scan for SDL441 only, SDL419 has separate scan (workaround) + # Set branch type to 'release' for scheduled runs and manual trigger on main branch. + # Required for automatic evidence submit. Excludes push events. + branch-type: ${{ github.ref == 'refs/heads/main' && (github.event_name == 'workflow_dispatch' || github.event_name == 'schedule') && 'release' || 'dev' }} diff --git a/.github/workflows/ci-fuzz-micro.yml b/.github/workflows/ci-fuzz-micro.yml index eb08e66e..9dc418de 100644 --- a/.github/workflows/ci-fuzz-micro.yml +++ b/.github/workflows/ci-fuzz-micro.yml @@ -41,7 +41,9 @@ jobs: - name: upload-artifact uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0 with: - name: fuzz-log-${{ github.sha }} - path: "build/fuzz-log.txt" + name: fuzz-evidence-${{ github.sha }} + path: | + build/fuzz-log.txt + build/report.txt diff --git a/.github/workflows/ci-fuzz.yml b/.github/workflows/ci-fuzz.yml index 0bcc2eea..5eb21cd1 100644 --- a/.github/workflows/ci-fuzz.yml +++ b/.github/workflows/ci-fuzz.yml @@ -43,7 +43,9 @@ jobs: - name: upload-artifact uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0 with: - name: fuzz-log-${{ github.sha }} - path: "build/fuzz-log.txt" + name: fuzz-evidence-${{ github.sha }} + path: | + build/fuzz-log.txt + build/report.txt diff --git a/.github/workflows/ci-windows.yml b/.github/workflows/ci-windows.yml index 41a64b6a..8d25bb15 100644 --- a/.github/workflows/ci-windows.yml +++ b/.github/workflows/ci-windows.yml @@ -8,6 +8,7 @@ on: env: BUILD_TYPE: Release + OPENSSL_ROOT_DIR: "C:\\Program Files\\OpenSSL-Win64" permissions: contents: read @@ -30,7 +31,11 @@ jobs: - name: Configure CMake run: | if (Test-Path ${{github.workspace}}\build){ Remove-Item ${{github.workspace}}\build -Recurse } - cmake -B ${{github.workspace}}\build + $cryptoLib = "$env:OPENSSL_ROOT_DIR\lib\VC\x64\MT\libcrypto_static.lib" + $sslLib = "$env:OPENSSL_ROOT_DIR\lib\VC\x64\MT\libssl_static.lib" + cmake -B ${{github.workspace}}\build ` + -DLIB_EAY_RELEASE:FILEPATH="$cryptoLib" ` + -DSSL_EAY_RELEASE:FILEPATH="$sslLib" - name: Build run: | cmake --build ${{github.workspace}}\build --config ${{env.BUILD_TYPE}} --parallel diff --git a/.gitignore b/.gitignore index 8aade9ba..8fac71fc 100644 --- a/.gitignore +++ b/.gitignore @@ -35,3 +35,4 @@ build src/simdjson .vscode/ _codeql_build_dir/ +tests/numa_test diff --git a/Dockerfile b/Dockerfile index ea9b1fea..8e4439a7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,13 +4,26 @@ FROM fedora:43@sha256:6cd815d862109208adf6040ea13391fe6aeb87a9dc80735c2ab07083fd # Copyright (c) 2020-2024 Intel Corporation RUN dnf -y install gcc-c++ git findutils make cmake openssl openssl-devel libasan libasan-static hwdata + COPY . /tmp/pcm -RUN cd /tmp/pcm && mkdir build && cd build && cmake -DPCM_NO_STATIC_LIBASAN=OFF .. && make -j +WORKDIR /tmp/pcm/build +RUN cmake -DPCM_NO_STATIC_LIBASAN=OFF .. && make -j FROM fedora:43@sha256:6cd815d862109208adf6040ea13391fe6aeb87a9dc80735c2ab07083fdf5e03a + COPY --from=builder /tmp/pcm/build/bin/* /usr/local/bin/ COPY --from=builder /tmp/pcm/build/bin/opCode*.txt /usr/local/share/pcm/ COPY --from=builder /usr/share/hwdata/pci.ids /usr/share/hwdata/pci.ids ENV PCM_NO_PERF=1 -ENTRYPOINT [ "/usr/local/bin/pcm-sensor-server", "-p", "9738", "-r" ] +RUN useradd -m pcm-user + +# Allow pcm-user to run the server via sudo without a password +RUN echo "pcm-user ALL=(root) NOPASSWD: /usr/local/bin/pcm-sensor-server" >> /etc/sudoers + +USER pcm-user + +HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \ + CMD sudo /usr/local/bin/pcm-sensor-server --help > /dev/null 2>&1 || exit 1 + +ENTRYPOINT [ "sudo", "/usr/local/bin/pcm-sensor-server", "-p", "9738", "-r" ] diff --git a/Intel-PMT b/Intel-PMT index 0c951602..8e57e182 160000 --- a/Intel-PMT +++ b/Intel-PMT @@ -1 +1 @@ -Subproject commit 0c9516029591fc70b15e3b7d95b9b14a9adf578f +Subproject commit 8e57e182feeaa55427199356c0b4c77bf92db89f diff --git a/doc/ENVVAR_README.md b/doc/ENVVAR_README.md index 3a93ced0..cba089ca 100644 --- a/doc/ENVVAR_README.md +++ b/doc/ENVVAR_README.md @@ -14,4 +14,6 @@ `PCM_ENFORCE_MBM=1` : force-enable Memory Bandwidth Monitoring (MBM) metrics (LocalMemoryBW = LMB) and (RemoteMemoryBW = RMB) on processors with RDT/MBM errata +`PCM_QUIET=1` : enable quiet mode for PCM initialization. In quiet mode, only error messages are output during PCM initialization, suppressing informational output such as processor information and topology details + `PCM_DEBUG_LEVEL=x` : x is an integer defining debug output level. level = 0 (default): minimal or no debug info, > 0 increases verbosity diff --git a/doc/NUMA_NODE_API.md b/doc/NUMA_NODE_API.md new file mode 100644 index 00000000..211ac96d --- /dev/null +++ b/doc/NUMA_NODE_API.md @@ -0,0 +1,119 @@ +# NUMA Node Location API for PCI Devices + +## Overview + +The `getNUMANode()` API allows you to retrieve the NUMA (Non-Uniform Memory Access) node location of a PCI device identified by its segment:bus:device:function coordinates. + +## Background + +- **PciHandle** and **PciHandleMM** classes are abstractions of PCI configuration space registers +- Each PCI device has a unique location: `segment:bus:device:function` +- **segment** is also known as **group number** or **domain** (synonyms: groupnr, groupnr_) + +## API Usage + +### Method Signature + +```cpp +int32 PciHandle::getNUMANode() const; +int32 PciHandleMM::getNUMANode() const; +``` + +### Return Value + +- **>= 0**: The NUMA node ID where the PCI device is located +- **-1**: NUMA information not available or not applicable + +### Example + +```cpp +#include "pci.h" + +using namespace pcm; + +// Open a PCI device at segment 0, bus 0, device 0, function 0 +PciHandleType handle(0, 0, 0, 0); + +// Get the NUMA node +int32 numa_node = handle.getNUMANode(); + +if (numa_node >= 0) { + std::cout << "Device is on NUMA node: " << numa_node << "\n"; +} else { + std::cout << "NUMA information not available\n"; +} +``` + +## Platform-Specific Implementation + +### Linux + +- **Method**: Reads from `/sys/bus/pci/devices/::./numa_node` +- **Fallback**: Also tries `/pcm/sys/bus/pci/devices/...` path +- **Return**: + - NUMA node ID (typically 0, 1, 2, ...) if available + - -1 if the file doesn't exist or can't be read + +### Windows + +- **Method**: Reads SRAT (System Resource Affinity Table) from ACPI firmware using `GetSystemFirmwareTable` API +- **Implementation**: + - Parses SRAT table to extract PCI Device Affinity structures (type 2) + - Builds a mapping from PCI device location (segment:bus:device:function) to NUMA node (proximity domain) + - Caches the mapping on first call for performance +- **Return**: + - NUMA node ID (proximity domain) if device is found in SRAT table + - -1 if SRAT table is not available or device is not listed +- **Requirements**: Windows Vista or later (for `GetSystemFirmwareTable` API) + +### FreeBSD / DragonFly + +- **Method**: Queries system via `sysctlbyname()` for NUMA domain information +- **Implementation**: + - First checks if NUMA is enabled via `vm.ndomains` sysctl + - Attempts to query PCI device-specific NUMA domain using multiple sysctl path formats + - Tries: `hw.pci.X.Y.Z.W.numa_domain` and `hw.pci.X:Y:Z.W.numa_domain` +- **Return**: + - NUMA node ID if available and system has NUMA enabled + - -1 if NUMA is disabled, not supported, or device affinity information unavailable +- **Note**: FreeBSD doesn't have a standardized sysctl path for PCI device NUMA affinity across all versions + +### macOS + +- **Method**: Returns -1 (macOS typically doesn't expose NUMA for PCI devices) +- **Return**: -1 (not applicable) + +## Use Cases + +1. **Performance Optimization**: Place processing threads on the same NUMA node as the device +2. **Memory Allocation**: Allocate buffers on the same NUMA node for optimal DMA performance +3. **System Topology Discovery**: Map out the relationship between PCI devices and NUMA nodes +4. **Monitoring and Analytics**: Identify cross-NUMA traffic patterns + +## Building the Example + +```bash +cd examples +g++ -std=c++11 -I../src numa_node_example.cpp -o numa_node_example -L../build/lib -lpcm -lpthread +LD_LIBRARY_PATH=../build/lib ./numa_node_example +``` + +## Notes + +- Requires appropriate permissions to access PCI configuration space +- On Linux, run with `sudo` or ensure `/sys/bus/pci` is accessible +- The NUMA node value is read at runtime and not cached +- A return value of -1 doesn't indicate an error; it means NUMA information is not available + +## Related APIs + +- `PciHandle::read32()` - Read 32-bit value from PCI configuration space +- `PciHandle::write32()` - Write 32-bit value to PCI configuration space +- `PciHandle::read64()` - Read 64-bit value from PCI configuration space +- `PciHandle::exists()` - Check if a PCI device exists + +## See Also + +- Linux kernel documentation: `Documentation/ABI/testing/sysfs-bus-pci` +- ACPI SRAT (System Resource Affinity Table) specification +- PCI Express Base Specification diff --git a/doc/WINDOWS_HOWTO.md b/doc/WINDOWS_HOWTO.md index 4ad1a03b..b4afc769 100644 --- a/doc/WINDOWS_HOWTO.md +++ b/doc/WINDOWS_HOWTO.md @@ -122,7 +122,7 @@ Starting from this release, **pcm-sensor-server** is now supported on Windows. T ### Running pcm-sensor-server on Windows -1. Choose or create a directory for PCM (e.g., `C:\Program Files\PCM\` or `C:\Program Files (x86)\PCM\`). Copy `msr.sys` and `pcm-sensor-server.exe` to this directory. +1. Create a directory for PCM in a protected location (e.g., `C:\Program Files\PCM\` or `C:\Program Files (x86)\PCM\`). Copy `msr.sys` and `pcm-sensor-server.exe` to this directory. **Important:** Do not place PCM binaries in user-writable directories (e.g., Downloads, Desktop, `C:\Users\Public\`) to prevent DLL planting attacks. 2. Run as Administrator (required for MSR access): ``` diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 30e11230..e9726a0c 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -21,4 +21,10 @@ if(UNIX) # create shared lib example add_executable(c_example_shlib ${EXAMPLE_FILE}) target_link_libraries(c_example_shlib PUBLIC PCM_SHARED PRIVATE Threads::Threads) -endif(UNIX) \ No newline at end of file + + # numa_to_socket_example + if(NOT APPLE) + add_executable(numa_to_socket_example numa_to_socket_example.cpp) + target_link_libraries(numa_to_socket_example PUBLIC PCM_SHARED PRIVATE Threads::Threads) + endif() +endif(UNIX) diff --git a/examples/numa_node_example.cpp b/examples/numa_node_example.cpp new file mode 100644 index 00000000..2d4f07e3 --- /dev/null +++ b/examples/numa_node_example.cpp @@ -0,0 +1,75 @@ +// SPDX-License-Identifier: BSD-3-Clause +// Copyright (c) 2024, Intel Corporation +// Example: How to retrieve NUMA node location for PCI devices + +#include +#include +#include "pci.h" + +using namespace pcm; + +int main() +{ + std::cout << "Example: Retrieving NUMA node location for PCI devices\n"; + std::cout << "========================================================\n\n"; + + // Example 1: Get NUMA node for a specific PCI device + // Format: segment (or group):bus:device.function + uint32 segment = 0; // Also known as "domain" or "group" + uint32 bus = 0; + uint32 device = 0; + uint32 function = 0; + + try + { + // Create a handle to the PCI device + // On Linux: uses /proc/bus/pci/ or PciHandleMM for memory-mapped access + // On Windows: uses Windows driver + // On FreeBSD: uses /dev/pci + // On macOS: uses PCIDriver + PciHandleType handle(segment, bus, device, function); + + std::cout << "Successfully opened PCI device " + << segment << ":" << bus << ":" << device << "." << function << "\n"; + + // Get the NUMA node location + int32 numa_node = handle.getNUMANode(); + + std::cout << "NUMA node: "; + if (numa_node >= 0) + { + std::cout << numa_node << "\n"; + } + else + { + std::cout << "Not available (return value: " << numa_node << ")\n"; + std::cout << "Note: -1 means NUMA information is not available on this platform\n"; + std::cout << " or the PCI device does not have NUMA node association.\n"; + } + + // You can also read PCI configuration space as usual + uint32 vendor_device_id = 0; + if (handle.read32(0, &vendor_device_id) == sizeof(uint32)) + { + uint32 vendor_id = vendor_device_id & 0xFFFF; + uint32 device_id = (vendor_device_id >> 16) & 0xFFFF; + std::cout << "\nPCI Device Info:\n"; + std::cout << " Vendor ID: 0x" << std::hex << std::setw(4) << std::setfill('0') + << vendor_id << "\n"; + std::cout << " Device ID: 0x" << std::setw(4) << std::setfill('0') + << device_id << std::dec << "\n"; + } + } + catch (const std::exception& e) + { + std::cerr << "Error: " << e.what() << "\n"; + std::cerr << "\nPossible reasons:\n"; + std::cerr << " - PCI device does not exist\n"; + std::cerr << " - Insufficient permissions (try running as root/administrator)\n"; + std::cerr << " - PCI subsystem not available on this platform\n"; + return 1; + } + + std::cout << "\n=== Example completed successfully ===\n"; + return 0; +} diff --git a/examples/numa_to_socket_example.cpp b/examples/numa_to_socket_example.cpp new file mode 100644 index 00000000..6f207015 --- /dev/null +++ b/examples/numa_to_socket_example.cpp @@ -0,0 +1,83 @@ +// SPDX-License-Identifier: BSD-3-Clause +// Copyright (c) 2025, Intel Corporation +// Example: How to map NUMA node IDs to CPU socket IDs + +#include +#include +#include "../src/cpucounters.h" + +using namespace pcm; + +int main() +{ + std::cout << "Example: Mapping NUMA node IDs to CPU socket IDs\n"; + std::cout << "==================================================\n\n"; + + // Get PCM instance + PCM * m = PCM::getInstance(); + + // Initialize PCM + PCM::ErrorCode status = m->program(); + if (status != PCM::Success) + { + std::cerr << "Error: Cannot access CPU counters\n"; + std::cerr << "Try running as root/administrator\n"; + std::cerr << "Error code: " << status << "\n"; + return 1; + } + + std::cout << "System Information:\n"; + std::cout << "-------------------\n"; + std::cout << "Number of sockets: " << m->getNumSockets() << "\n"; + std::cout << "Number of cores: " << m->getNumCores() << "\n"; + std::cout << "Number of online cores: " << m->getNumOnlineCores() << "\n\n"; + + // Example: Map NUMA nodes to sockets + std::cout << "NUMA Node to Socket Mapping:\n"; + std::cout << "----------------------------\n"; + + // Try to map the first few NUMA nodes (typically 0-7 is sufficient) + const uint32 max_numa_nodes_to_check = 8; + bool found_any = false; + + for (uint32 numa_node = 0; numa_node < max_numa_nodes_to_check; ++numa_node) + { + int32 socket_id = m->mapNUMANodeToSocket(numa_node); + + if (socket_id >= 0) + { + std::cout << " NUMA node " << numa_node + << " -> Socket " << socket_id << "\n"; + found_any = true; + } + } + + if (!found_any) + { + std::cout << " No NUMA node mappings available\n"; + std::cout << "\nNote: This is normal on:\n"; + std::cout << " - Single-socket systems\n"; + std::cout << " - Systems without NUMA support\n"; + std::cout << " - macOS (not implemented)\n"; + std::cout << " - FreeBSD without NUMA enabled (vm.ndomains <= 1)\n"; + } + + std::cout << "\n"; + + // Example: Show relationship between cores and sockets + std::cout << "Core to Socket Mapping (first 8 cores):\n"; + std::cout << "----------------------------------------\n"; + const uint32 cores_to_show = std::min((uint32)8, m->getNumCores()); + for (uint32 core = 0; core < cores_to_show; ++core) + { + int32 socket = m->getSocketId(core); + std::cout << " Core " << core << " -> Socket " << socket << "\n"; + } + + std::cout << "\n=== Example completed successfully ===\n"; + + // Cleanup + m->cleanup(); + + return 0; +} diff --git a/scripts/bhs-power-mode.ps1 b/scripts/bhs-power-mode.ps1 index dd360f50..488eb3dc 100644 --- a/scripts/bhs-power-mode.ps1 +++ b/scripts/bhs-power-mode.ps1 @@ -11,20 +11,23 @@ Write-Output "" $output = pcm-tpmi 2 0x10 -d -b 26:26 # Parse the output to build lists of I/O and compute dies +# Store as "instance:entry" to handle multiple instances per socket $io_dies = @() $compute_dies = @() $die_types = @{} $output -split "`n" | ForEach-Object { $line = $_ - if ($line -match "instance 0") { - $die = $line -match 'entry (\d+)' | Out-Null; $matches[1] + if ($line -match "entry" -and $line -match "instance") { + $entry = $line -match 'entry (\d+)' | Out-Null; $matches[1] + $instance = $line -match 'instance (\d+)' | Out-Null; $matches[1] + $die_key = "${instance}:${entry}" if ($line -match "value 1") { - $die_types[$die] = "IO" - $io_dies += $die + $die_types[$die_key] = "IO" + $io_dies += $die_key } elseif ($line -match "value 0") { - $die_types[$die] = "Compute" - $compute_dies += $die + $die_types[$die_key] = "Compute" + $compute_dies += $die_key } } } @@ -32,46 +35,54 @@ $output -split "`n" | ForEach-Object { if ($args[0] -eq "--optimized-power-mode") { Write-Output "Setting optimized power mode..." - foreach ($die in $io_dies) { + foreach ($die_key in $io_dies) { + $instance = $die_key -split ":" | Select-Object -First 1 + $entry = $die_key -split ":" | Select-Object -Last 1 # EFFICIENCY_LATENCY_CTRL_RATIO (Uncore IO) - pcm-tpmi 2 0x18 -d -e $die -b 28:22 -w 8 + pcm-tpmi 2 0x18 -d -i $instance -e $entry -b 28:22 -w 8 # EFFICIENCY_LATENCY_CTRL_LOW_THRESHOLD (Uncore IO) - pcm-tpmi 2 0x18 -d -e $die -b 38:32 -w 13 + pcm-tpmi 2 0x18 -d -i $instance -e $entry -b 38:32 -w 13 # EFFICIENCY_LATENCY_CTRL_HIGH_THRESHOLD (Uncore IO) - pcm-tpmi 2 0x18 -d -e $die -b 46:40 -w 120 + pcm-tpmi 2 0x18 -d -i $instance -e $entry -b 46:40 -w 120 # EFFICIENCY_LATENCY_CTRL_HIGH_THRESHOLD_ENABLE (Uncore IO) - pcm-tpmi 2 0x18 -d -e $die -b 39:39 -w 1 + pcm-tpmi 2 0x18 -d -i $instance -e $entry -b 39:39 -w 1 } - foreach ($die in $compute_dies) { + foreach ($die_key in $compute_dies) { + $instance = $die_key -split ":" | Select-Object -First 1 + $entry = $die_key -split ":" | Select-Object -Last 1 # EFFICIENCY_LATENCY_CTRL_RATIO (Uncore Compute) - pcm-tpmi 2 0x18 -d -e $die -b 28:22 -w 12 + pcm-tpmi 2 0x18 -d -i $instance -e $entry -b 28:22 -w 12 } } if ($args[0] -eq "--latency-optimized-mode") { Write-Output "Setting latency optimized mode..." - foreach ($die in $io_dies) { + foreach ($die_key in $io_dies) { + $instance = $die_key -split ":" | Select-Object -First 1 + $entry = $die_key -split ":" | Select-Object -Last 1 # EFFICIENCY_LATENCY_CTRL_RATIO (Uncore IO) - pcm-tpmi 2 0x18 -d -e $die -b 28:22 -w 0 + pcm-tpmi 2 0x18 -d -i $instance -e $entry -b 28:22 -w 0 # EFFICIENCY_LATENCY_CTRL_LOW_THRESHOLD (Uncore IO) - pcm-tpmi 2 0x18 -d -e $die -b 38:32 -w 0 + pcm-tpmi 2 0x18 -d -i $instance -e $entry -b 38:32 -w 0 # EFFICIENCY_LATENCY_CTRL_HIGH_THRESHOLD (Uncore IO) - pcm-tpmi 2 0x18 -d -e $die -b 46:40 -w 0 + pcm-tpmi 2 0x18 -d -i $instance -e $entry -b 46:40 -w 0 # EFFICIENCY_LATENCY_CTRL_HIGH_THRESHOLD_ENABLE (Uncore IO) - pcm-tpmi 2 0x18 -d -e $die -b 39:39 -w 1 + pcm-tpmi 2 0x18 -d -i $instance -e $entry -b 39:39 -w 1 } - foreach ($die in $compute_dies) { + foreach ($die_key in $compute_dies) { + $instance = $die_key -split ":" | Select-Object -First 1 + $entry = $die_key -split ":" | Select-Object -Last 1 # EFFICIENCY_LATENCY_CTRL_RATIO (Uncore Compute) - pcm-tpmi 2 0x18 -d -e $die -b 28:22 -w 0 + pcm-tpmi 2 0x18 -d -i $instance -e $entry -b 28:22 -w 0 } } @@ -83,10 +94,16 @@ function ExtractAndPrintMetrics { param ( [int]$value, [int]$socket_id, - [int]$die + [string]$die_key, + [string]$numa_node, + [int]$instance ) - $die_type = $die_types[$die] + $die_type = $die_types[$die_key] + + # Extract instance and entry from die_key + $inst = $die_key -split ":" | Select-Object -First 1 + $entry = $die_key -split ":" | Select-Object -Last 1 # Extract bits and calculate metrics $min_ratio = ($value -shr 15) -band 0x7F @@ -104,7 +121,12 @@ function ExtractAndPrintMetrics { $eff_latency_ctrl_high_threshold = ($eff_latency_ctrl_high_threshold * 100) / 127 # Print metrics - Write-Output "Socket ID: $socket_id, Die: $die, Type: $die_type" + $output_str = "Socket ID: $socket_id" + if ($numa_node) { + $output_str += ", NUMA node: $numa_node" + } + $output_str += ", instance: $instance, Die: $entry, Type: $die_type" + Write-Output $output_str Write-Output "MIN_RATIO: $min_ratio MHz" Write-Output "MAX_RATIO: $max_ratio MHz" Write-Output "EFFICIENCY_LATENCY_CTRL_RATIO: $eff_latency_ctrl_ratio MHz" @@ -117,16 +139,31 @@ function ExtractAndPrintMetrics { } # Iterate over all dies and run pcm-tpmi for each to get the metrics -foreach ($die in $die_types.Keys) { - $output = pcm-tpmi 2 0x18 -d -e $die +foreach ($die_key in $die_types.Keys) { + $instance = $die_key -split ":" | Select-Object -First 1 + $entry = $die_key -split ":" | Select-Object -Last 1 + $output = pcm-tpmi 2 0x18 -d -i $instance -e $entry # Parse the output and extract metrics for each socket $output -split "`n" | ForEach-Object { $line = $_ if ($line -match "Read value") { $value = $line -match 'value (\d+)' | Out-Null; $matches[1] - $socket_id = $line -match 'instance (\d+)' | Out-Null; $matches[1] - ExtractAndPrintMetrics -value $value -socket_id $socket_id -die $die + $inst = $line -match 'instance (\d+)' | Out-Null; $matches[1] + $ent = $line -match 'entry (\d+)' | Out-Null; $matches[1] + $parsed_die_key = "${inst}:${ent}" + # Extract socket ID if present, otherwise fallback to instance ID + if ($line -match '\(socket (\d+)\)') { + $socket_id = $matches[1] + } else { + $socket_id = $inst + } + # Extract NUMA node if present + $numa_node = $null + if ($line -match '\(NUMA node (\d+)\)') { + $numa_node = $matches[1] + } + ExtractAndPrintMetrics -value $value -socket_id $socket_id -die_key $parsed_die_key -numa_node $numa_node -instance $inst } } } \ No newline at end of file diff --git a/scripts/bhs-power-mode.sh b/scripts/bhs-power-mode.sh index 4935f1c3..988c6c40 100644 --- a/scripts/bhs-power-mode.sh +++ b/scripts/bhs-power-mode.sh @@ -13,18 +13,21 @@ echo output=$(pcm-tpmi 2 0x10 -d -b 26:26) # Parse the output to build lists of I/O and compute dies +# Store as "instance:entry" to handle multiple instances per socket io_dies=() compute_dies=() declare -A die_types while read -r line; do - if [[ $line == *"instance 0"* ]]; then - die=$(echo "$line" | grep -oP 'entry \K[0-9]+') + if [[ $line == *"entry"* && $line == *"instance"* ]]; then + entry=$(echo "$line" | grep -oP 'entry \K[0-9]+') + instance=$(echo "$line" | grep -oP 'instance \K[0-9]+') + die_key="${instance}:${entry}" if [[ $line == *"value 1"* ]]; then - die_types[$die]="IO" - io_dies+=("$die") + die_types[$die_key]="IO" + io_dies+=("$die_key") elif [[ $line == *"value 0"* ]]; then - die_types[$die]="Compute" - compute_dies+=("$die") + die_types[$die_key]="Compute" + compute_dies+=("$die_key") fi fi done <<< "$output" @@ -32,23 +35,27 @@ done <<< "$output" if [ "$1" == "--optimized-power-mode" ]; then echo "Setting optimized power mode..." - for die in "${io_dies[@]}"; do + for die_key in "${io_dies[@]}"; do + instance="${die_key%:*}" + entry="${die_key#*:}" # EFFICIENCY_LATENCY_CTRL_RATIO (Uncore IO) - pcm-tpmi 2 0x18 -d -e $die -b 28:22 -w 8 + pcm-tpmi 2 0x18 -d -i $instance -e $entry -b 28:22 -w 8 #EFFICIENCY_LATENCY_CTRL_LOW_THRESHOLD (Uncore IO) - pcm-tpmi 2 0x18 -d -e $die -b 38:32 -w 13 + pcm-tpmi 2 0x18 -d -i $instance -e $entry -b 38:32 -w 13 #EFFICIENCY_LATENCY_CTRL_HIGH_THRESHOLD(Uncore IO) - pcm-tpmi 2 0x18 -d -e $die -b 46:40 -w 120 + pcm-tpmi 2 0x18 -d -i $instance -e $entry -b 46:40 -w 120 #EFFICIENCY_LATENCY_CTRL_HIGH_THRESHOLD_ENABLE (Uncore IO) - pcm-tpmi 2 0x18 -d -e $die -b 39:39 -w 1 + pcm-tpmi 2 0x18 -d -i $instance -e $entry -b 39:39 -w 1 done - for die in "${compute_dies[@]}"; do + for die_key in "${compute_dies[@]}"; do + instance="${die_key%:*}" + entry="${die_key#*:}" # EFFICIENCY_LATENCY_CTRL_RATIO (Uncore Compute) - pcm-tpmi 2 0x18 -d -e $die -b 28:22 -w 12 + pcm-tpmi 2 0x18 -d -i $instance -e $entry -b 28:22 -w 12 done fi @@ -56,23 +63,27 @@ fi if [ "$1" == "--latency-optimized-mode" ]; then echo "Setting latency optimized mode..." - for die in "${io_dies[@]}"; do + for die_key in "${io_dies[@]}"; do + instance="${die_key%:*}" + entry="${die_key#*:}" # EFFICIENCY_LATENCY_CTRL_RATIO (Uncore IO) - pcm-tpmi 2 0x18 -d -e $die -b 28:22 -w 0 + pcm-tpmi 2 0x18 -d -i $instance -e $entry -b 28:22 -w 0 #EFFICIENCY_LATENCY_CTRL_LOW_THRESHOLD (Uncore IO) - pcm-tpmi 2 0x18 -d -e $die -b 38:32 -w 0 + pcm-tpmi 2 0x18 -d -i $instance -e $entry -b 38:32 -w 0 #EFFICIENCY_LATENCY_CTRL_HIGH_THRESHOLD(Uncore IO) - pcm-tpmi 2 0x18 -d -e $die -b 46:40 -w 0 + pcm-tpmi 2 0x18 -d -i $instance -e $entry -b 46:40 -w 0 #EFFICIENCY_LATENCY_CTRL_HIGH_THRESHOLD_ENABLE (Uncore IO) - pcm-tpmi 2 0x18 -d -e $die -b 39:39 -w 1 + pcm-tpmi 2 0x18 -d -i $instance -e $entry -b 39:39 -w 1 done - for die in "${compute_dies[@]}"; do + for die_key in "${compute_dies[@]}"; do + instance="${die_key%:*}" + entry="${die_key#*:}" # EFFICIENCY_LATENCY_CTRL_RATIO (Uncore Compute) - pcm-tpmi 2 0x18 -d -e $die -b 28:22 -w 0 + pcm-tpmi 2 0x18 -d -i $instance -e $entry -b 28:22 -w 0 done fi @@ -85,8 +96,14 @@ echo "" extract_and_print_metrics() { local value=$1 local socket_id=$2 - local die=$3 - local die_type=${die_types[$die]} + local die_key=$3 + local numa_node=$4 + local instance=$5 + local die_type=${die_types[$die_key]} + + # Extract instance and entry from die_key + local inst="${die_key%:*}" + local entry="${die_key#*:}" # Extract bits and calculate metrics local min_ratio=$(( (value >> 15) & 0x7F )) @@ -104,7 +121,11 @@ extract_and_print_metrics() { eff_latency_ctrl_high_threshold=$(( (eff_latency_ctrl_high_threshold * 100) / 127 )) # Print metrics - echo "Socket ID: $socket_id, Die: $die, Type: $die_type" + echo -n "Socket ID: $socket_id" + if [ -n "$numa_node" ]; then + echo -n ", NUMA node: $numa_node" + fi + echo ", instance: $instance, Die: $entry, Type: $die_type" echo "MIN_RATIO: $min_ratio MHz" echo "MAX_RATIO: $max_ratio MHz" echo "EFFICIENCY_LATENCY_CTRL_RATIO: $eff_latency_ctrl_ratio MHz" @@ -117,15 +138,33 @@ extract_and_print_metrics() { } # Iterate over all dies and run pcm-tpmi for each to get the metrics -for die in "${!die_types[@]}"; do - output=$(pcm-tpmi 2 0x18 -d -e "$die") +for die_key in "${!die_types[@]}"; do + instance="${die_key%:*}" + entry="${die_key#*:}" + output=$(pcm-tpmi 2 0x18 -d -i $instance -e $entry) # Parse the output and extract metrics for each socket while read -r line; do if [[ $line == *"Read value"* ]]; then value=$(echo "$line" | grep -oP 'value \K[0-9]+') - socket_id=$(echo "$line" | grep -oP 'instance \K[0-9]+') - extract_and_print_metrics "$value" "$socket_id" "$die" + # Extract instance ID + inst=$(echo "$line" | grep -oP 'instance \K[0-9]+') + # Extract entry ID + ent=$(echo "$line" | grep -oP 'entry \K[0-9]+') + # Create die_key from instance and entry + parsed_die_key="${inst}:${ent}" + # Extract socket ID if present, otherwise fallback to instance ID + if [[ $line =~ \(socket\ ([0-9]+)\) ]]; then + socket_id=${BASH_REMATCH[1]} + else + socket_id=$inst + fi + # Extract NUMA node ID if present in the output (format: "(NUMA node X)") + numa_node="" + if [[ $line =~ \(NUMA\ node\ ([0-9]+)\) ]]; then + numa_node=${BASH_REMATCH[1]} + fi + extract_and_print_metrics "$value" "$socket_id" "$parsed_die_key" "$numa_node" "$inst" fi done <<< "$output" done diff --git a/scripts/bhs-die-stat.sh b/scripts/ufs-die-stat.sh similarity index 62% rename from scripts/bhs-die-stat.sh rename to scripts/ufs-die-stat.sh index ecff3fc7..4d196122 100644 --- a/scripts/bhs-die-stat.sh +++ b/scripts/ufs-die-stat.sh @@ -2,7 +2,7 @@ echo "Intel(r) Performance Counter Monitor" -echo "Birch Stream Die Statistics Utility" +echo "Uncore Frequency Scaling: Die Statistics Utility" echo # Run the pcm-tpmi command and store the output @@ -15,7 +15,21 @@ echo "$output" | while read -r line; do # Extract the value using BASH_REMATCH value=${BASH_REMATCH[1]} die=${BASH_REMATCH[2]} - socket=${BASH_REMATCH[3]} + instance=${BASH_REMATCH[3]} + + # Extract socket ID if present in the output (format: "(socket X)") + if [[ $line =~ \(socket\ ([0-9]+)\) ]]; then + socket=${BASH_REMATCH[1]} + else + # Fallback to instance ID if socket info is not available + socket=$instance + fi + + # Extract NUMA node ID if present in the output (format: "(NUMA node X)") + numa_node="" + if [[ $line =~ \(NUMA\ node\ ([0-9]+)\) ]]; then + numa_node=${BASH_REMATCH[1]} + fi freq=$(( (value & 0x7F) * 100 )) compute=$(( (value >> 23) & 1 )) @@ -37,7 +51,11 @@ echo "$output" | while read -r line; do die_type="${die_type}IO" fi die_type="${die_type%"${die_type##*[!\/]}"}" - str="Socket $socket die $die ($die_type) uncore frequency" + str="Socket $socket" + if [ -n "$numa_node" ]; then + str="$str NUMA node $numa_node" + fi + str="$str instance $instance die $die ($die_type) uncore frequency" printf "%-60s: %d MHz\n" "$str" "$freq" fi done diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index c19d30db..95447f1f 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,8 +1,6 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright (c) 2022-2025, Intel Corporation -include(FindOpenSSL) - # All pcm-* executables set(PROJECT_NAMES pcm pcm-numa pcm-latency pcm-power pcm-msr pcm-memory pcm-tsx pcm-pcie pcm-core pcm-iio pcm-pcicfg pcm-mmio pcm-tpmi pcm-raw pcm-accel pcm-sensor-server) @@ -220,6 +218,10 @@ if(PCM_BUILD_EXECUTABLES) message(STATUS "OpenSSL version ${OPENSSL_VERSION} >= ${MINIMUM_OPENSSL_VERSION}, OpenSSL support enabled") target_compile_options(${PROJECT_NAME} PRIVATE "-DUSE_SSL") set(LIBS ${LIBS} OpenSSL::SSL OpenSSL::Crypto) + if(MSVC) + # Static OpenSSL on Windows depends on these system libraries + set(LIBS ${LIBS} crypt32 ws2_32) + endif() else() message(STATUS "OpenSSL support has been disabled, the version is less than ${MINIMUM_OPENSSL_VERSION}") endif() diff --git a/src/client/client.cpp b/src/client/client.cpp index d71cb78d..56610731 100644 --- a/src/client/client.cpp +++ b/src/client/client.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include "../daemon/common.h" #include "client.h" @@ -96,9 +97,16 @@ namespace PCMDaemon { void Client::setupSharedMemory() { int sharedMemoryId; - FILE *fp = fopen (shmIdLocation_.c_str(), "r"); + // SDL330: Use O_NOFOLLOW to reject symlinks + int fd = open(shmIdLocation_.c_str(), O_RDONLY | O_NOFOLLOW); + if (fd < 0 && errno == ELOOP) { + std::cerr << "SDL330 ERROR: Symlink detected at " << shmIdLocation_ << "\n"; + exit(EXIT_FAILURE); + } + FILE *fp = (fd >= 0) ? fdopen(fd, "r") : NULL; if (!fp) { + if (fd >= 0) close(fd); std::cerr << "Failed to open to shared memory key location: " << shmIdLocation_ << "\n"; exit(EXIT_FAILURE); } diff --git a/src/cpucounters.cpp b/src/cpucounters.cpp index 67c756d4..c1039ab6 100644 --- a/src/cpucounters.cpp +++ b/src/cpucounters.cpp @@ -128,6 +128,7 @@ bool PCM::initWinRing0Lib() #endif PCM * PCM::instance = NULL; +std::atomic PCM::quietMode{false}; /* static int bitCount(uint64 n) @@ -303,13 +304,19 @@ void PCM::readCoreCounterConfig(const bool complainAboutMSR) if (aws_workaround == true && vm == true && linux_arch_perfmon == true && core_gen_counter_num_max > 3) { core_gen_counter_num_max = 3; - std::cerr << "INFO: Reducing the number of programmable counters to 3 to workaround the fixed cycle counter virtualization issue on AWS.\n"; - std::cerr << " You can disable the workaround by setting PCM_NO_AWS_WORKAROUND=1 environment variable\n"; + if (!quietMode) + { + std::cerr << "INFO: Reducing the number of programmable counters to 3 to workaround the fixed cycle counter virtualization issue on AWS.\n"; + std::cerr << " You can disable the workaround by setting PCM_NO_AWS_WORKAROUND=1 environment variable\n"; + } } if (isNMIWatchdogEnabled(true) && keepNMIWatchdogEnabled()) { --core_gen_counter_num_max; - std::cerr << "INFO: Reducing the number of programmable counters to " << core_gen_counter_num_max << " because NMI watchdog is enabled.\n"; + if (!quietMode) + { + std::cerr << "INFO: Reducing the number of programmable counters to " << core_gen_counter_num_max << " because NMI watchdog is enabled.\n"; + } } #endif } @@ -433,14 +440,20 @@ bool PCM::detectModel() if (cpuinfo.reg.ecx & (1UL << 31UL)) { vm = true; - std::cerr << "Detected a hypervisor/virtualization technology. Some metrics might not be available due to configuration or availability of virtual hardware features.\n"; + if (!quietMode) + { + std::cerr << "Detected a hypervisor/virtualization technology. Some metrics might not be available due to configuration or availability of virtual hardware features.\n"; + } } readCoreCounterConfig(); pcm_cpuid(7, 0, cpuinfo); - std::cerr << "\n===== Processor information =====\n"; + if (!quietMode) + { + std::cerr << "\n===== Processor information =====\n"; + } #ifdef __linux__ auto checkLinuxCpuinfoFlag = [](const std::string& flag) -> bool @@ -468,7 +481,10 @@ bool PCM::detectModel() return false; }; linux_arch_perfmon = checkLinuxCpuinfoFlag("arch_perfmon"); - std::cerr << "Linux arch_perfmon flag : " << (linux_arch_perfmon ? "yes" : "no") << "\n"; + if (!quietMode) + { + std::cerr << "Linux arch_perfmon flag : " << (linux_arch_perfmon ? "yes" : "no") << "\n"; + } if (vm == true && linux_arch_perfmon == false) { std::cerr << "ERROR: vPMU is not enabled in the hypervisor. Please see details in https://software.intel.com/content/www/us/en/develop/documentation/vtune-help/top/set-up-analysis-target/on-virtual-machine.html \n"; @@ -486,13 +502,16 @@ bool PCM::detectModel() } #endif hybrid = (cpuinfo.reg.edx & (1 << 15)) ? true : false; - std::cerr << "Hybrid processor : " << (hybrid ? "yes" : "no") << "\n"; - std::cerr << "IBRS and IBPB supported : " << ((cpuinfo.reg.edx & (1 << 26)) ? "yes" : "no") << "\n"; - std::cerr << "STIBP supported : " << ((cpuinfo.reg.edx & (1 << 27)) ? "yes" : "no") << "\n"; - std::cerr << "Spec arch caps supported : " << ((cpuinfo.reg.edx & (1 << 29)) ? "yes" : "no") << "\n"; - std::cerr << "Max CPUID level : " << max_cpuid << "\n"; - std::cerr << "CPU family : " << cpu_family << "\n"; - std::cerr << "CPU model number : " << cpu_model_private << "\n"; + if (!quietMode) + { + std::cerr << "Hybrid processor : " << (hybrid ? "yes" : "no") << "\n"; + std::cerr << "IBRS and IBPB supported : " << ((cpuinfo.reg.edx & (1 << 26)) ? "yes" : "no") << "\n"; + std::cerr << "STIBP supported : " << ((cpuinfo.reg.edx & (1 << 27)) ? "yes" : "no") << "\n"; + std::cerr << "Spec arch caps supported : " << ((cpuinfo.reg.edx & (1 << 29)) ? "yes" : "no") << "\n"; + std::cerr << "Max CPUID level : " << max_cpuid << "\n"; + std::cerr << "CPU family : " << cpu_family << "\n"; + std::cerr << "CPU model number : " << cpu_model_private << "\n"; + } return true; } @@ -512,7 +531,10 @@ bool PCM::isRDTDisabled() const #endif if (env != nullptr && std::string(env) == std::string("1")) { - std::cout << "Disabling RDT usage because PCM_NO_RDT=1 environment variable is set.\n"; + if (!quietMode) + { + std::cout << "Disabling RDT usage because PCM_NO_RDT=1 environment variable is set.\n"; + } flag = 1; } else @@ -605,27 +627,39 @@ void PCM::initRDT() auto env = std::getenv("PCM_USE_RESCTRL"); if (env != nullptr && std::string(env) == std::string("1")) { - std::cerr << "INFO: using Linux resctrl driver for RDT metrics (L3OCC, LMB, RMB) because environment variable PCM_USE_RESCTRL=1\n"; + if (!quietMode) + { + std::cerr << "INFO: using Linux resctrl driver for RDT metrics (L3OCC, LMB, RMB) because environment variable PCM_USE_RESCTRL=1\n"; + } resctrl.init(); useResctrl = true; return; } if (resctrl.isMounted()) { - std::cerr << "INFO: using Linux resctrl driver for RDT metrics (L3OCC, LMB, RMB) because resctrl driver is mounted.\n"; + if (!quietMode) + { + std::cerr << "INFO: using Linux resctrl driver for RDT metrics (L3OCC, LMB, RMB) because resctrl driver is mounted.\n"; + } resctrl.init(); useResctrl = true; return; } if (isSecureBoot()) { - std::cerr << "INFO: using Linux resctrl driver for RDT metrics (L3OCC, LMB, RMB) because Secure Boot mode is enabled.\n"; + if (!quietMode) + { + std::cerr << "INFO: using Linux resctrl driver for RDT metrics (L3OCC, LMB, RMB) because Secure Boot mode is enabled.\n"; + } resctrl.init(); useResctrl = true; return; } #endif - std::cerr << "Initializing RMIDs" << std::endl; + if (!quietMode) + { + std::cerr << "Initializing RMIDs" << std::endl; + } unsigned maxRMID; /* Calculate maximum number of RMID supported by socket */ maxRMID = getMaxRMID(); @@ -1754,7 +1788,10 @@ bool PCM::detectNominalFrequency() } #ifndef PCM_SILENT - std::cerr << "Nominal core frequency: " << nominal_frequency << " Hz\n"; + if (!quietMode) + { + std::cerr << "Nominal core frequency: " << nominal_frequency << " Hz\n"; + } #endif } @@ -1783,9 +1820,12 @@ void PCM::initEnergyMonitoring() pkgMaximumPower = (int32) (double(extract_bits(package_power_info, 32, 46))*wattsPerPowerUnit); #ifndef PCM_SILENT - std::cerr << "Package thermal spec power: " << pkgThermalSpecPower << " Watt; "; - std::cerr << "Package minimum power: " << pkgMinimumPower << " Watt; "; - std::cerr << "Package maximum power: " << pkgMaximumPower << " Watt;\n"; + if (!quietMode) + { + std::cerr << "Package thermal spec power: " << pkgThermalSpecPower << " Watt; "; + std::cerr << "Package minimum power: " << pkgMinimumPower << " Watt; "; + std::cerr << "Package maximum power: " << pkgMaximumPower << " Watt;\n"; + } #endif int i = 0; @@ -2094,23 +2134,50 @@ void PCM::initUncoreObjects() //TPMIHandle::setVerbose(true); try { - if (isServerCPU() && TPMIHandle::getNumInstances() == (size_t)num_sockets) + if (isServerCPU() && TPMIHandle::getNumInstances() > 0) { - DBG(1, "TPMIHandle::getNumInstances(): ", TPMIHandle::getNumInstances()); + const auto nInstances = TPMIHandle::getNumInstances(); + DBG(1, "TPMIHandle::getNumInstances(): ", nInstances); UFSStatus.resize(num_sockets); - for (uint32 s = 0; s < (uint32)num_sockets; ++s) + for (uint32 i = 0; i < (uint32)nInstances; ++i) { + uint32 socket = (std::numeric_limits::max)(); // invalid socket by default try { - TPMIHandle h(s, UFS_ID, UFS_FABRIC_CLUSTER_OFFSET * sizeof(uint64)); - DBG(1, "Socket ", s, " dies: ", h.getNumEntries()); + TPMIHandle h(i, UFS_ID, UFS_FABRIC_CLUSTER_OFFSET * sizeof(uint64)); + const auto numaNode = h.getNUMANode(); + DBG(1, "Instance ", i, " NUMA node: ", numaNode); + if (numaNode >= 0) + { + const auto socketTmp = mapNUMANodeToSocket(numaNode); + DBG(1, "Instance ", i, " mapped to socket: ", socketTmp); + if (socketTmp >= 0 && socketTmp < (int32)num_sockets) + { + socket = (uint32)socketTmp; + } + else + { + socket = 0; + std::cerr << "WARNING: Could not map UFS TPMI instance " << i << " NUMA node " << numaNode << " to socket. Assuming socket 0.\n"; + } + } + else + { + socket = 0; + std::cerr << "WARNING: Could not map UFS TPMI instance " << i << " to NUMA node. Assuming socket 0.\n"; + } + DBG(1, "Instance ", i, " Socket ", socket, " dies: ", h.getNumEntries()); for (size_t die = 0; die < h.getNumEntries(); ++die) { const auto clusterOffset = extract_bits(h.read64(die), 0, 7); - UFSStatus[s].push_back(std::make_shared(s, UFS_ID, (clusterOffset + UFS_STATUS)* sizeof(uint64))); + assert(socket < UFSStatus.size()); + UFSStatus[socket].push_back( + UFSStatusEntry( + std::make_shared(i, UFS_ID, (clusterOffset + UFS_STATUS)* sizeof(uint64)), + die)); } } catch (std::exception & e) { - std::cerr << "ERROR: Could not open UFS TPMI register on socket " << s << ". Uncore frequency metrics will be unavailable. Exception details: " << e.what() << "\n"; + std::cerr << "ERROR: Could not open UFS TPMI register on socket " << socket << " instance " << i << ". Uncore frequency metrics will be unavailable. Exception details: " << e.what() << "\n"; } } } @@ -2119,7 +2186,7 @@ void PCM::initUncoreObjects() std::cerr << "ERROR: Could not initialize TPMI. Uncore frequency metrics will be unavailable. Exception details: " << e.what() << "\n"; } - for (uint32 s = 0; s < (uint32)num_sockets; ++s) + for (uint32 s = 0; s < (uint32)num_sockets && !quietMode; ++s) { std::cerr << "Socket " << s << ":" << " " << getMaxNumOfUncorePMUs(PCU_PMU_ID, s) << " PCU units detected." @@ -2700,7 +2767,7 @@ void PCM::initUncorePMUsDirect() initSocket2Bus(socket2DSAbus, SPR_IDX_DSA_REGISTER_DEV_ADDR, SPR_IDX_DSA_REGISTER_FUNC_ADDR, DSA_DEV_IDS, (uint32)sizeof(DSA_DEV_IDS) / sizeof(DSA_DEV_IDS[0])); initSocket2Bus(socket2QATbus, SPR_IDX_QAT_REGISTER_DEV_ADDR, SPR_IDX_QAT_REGISTER_FUNC_ADDR, QAT_DEV_IDS, (uint32)sizeof(QAT_DEV_IDS) / sizeof(QAT_DEV_IDS[0])); #ifndef PCM_SILENT - std::cerr << "Info: IDX - Detected " << socket2IAAbus.size() << " IAA devices, " << socket2DSAbus.size() << " DSA devices, " << socket2QATbus.size() << " QAT devices. \n"; + if (!quietMode) std::cerr << "INFO: IDX - Detected " << socket2IAAbus.size() << " IAA devices, " << socket2DSAbus.size() << " DSA devices, " << socket2QATbus.size() << " QAT devices. \n"; #endif initRootBusMap(rootbusMap); @@ -2754,7 +2821,7 @@ void PCM::initUncorePMUsDirect() std::hex << devInfo.func << "/telemetry/control"; qatTLMCTLStr = readSysFS(qat_TLMCTL_sysfs_path.str().c_str(), true); if(!qatTLMCTLStr.size()){ - std::cerr << "Warning: IDX - QAT telemetry feature of B:0x" << std::hex << devInfo.bus << ",D:0x" << devInfo.dev << ",F:0x" << devInfo.func \ + if (!quietMode) std::cerr << "INFO: IDX - QAT telemetry feature of B:0x" << std::hex << devInfo.bus << ",D:0x" << devInfo.dev << ",F:0x" << devInfo.func \ << " is NOT available, skipped." << std::dec << std::endl; continue; } @@ -3201,6 +3268,12 @@ PCM::PCM() : run_state(1), needToRestoreNMIWatchdog(false) { + // Check for PCM_QUIET environment variable + if (safe_getenv("PCM_QUIET") == std::string("1")) + { + quietMode = true; + } + #ifdef __linux__ increaseULimit(); #endif @@ -3229,29 +3302,42 @@ PCM::PCM() : readCoreCounterConfig(true); #ifndef PCM_SILENT - printSystemTopology(); + if (!quietMode) + { + printSystemTopology(); + } #endif if(!detectNominalFrequency()) return; - showSpecControlMSRs(); + if (!quietMode) + { + showSpecControlMSRs(); + } #ifndef PCM_DEBUG_TOPOLOGY if (safe_getenv("PCM_PRINT_TOPOLOGY") == "1") #endif { - printDetailedSystemTopology(1); + if (!quietMode) + { + printDetailedSystemTopology(1); + } } initEnergyMonitoring(); #ifndef PCM_SILENT - std::cerr << "\n"; + if (!quietMode) + { + std::cerr << "\n"; + } #endif if (isServerCPU()) { - uncorePMUDiscovery = std::make_shared(); + assert(topology.size()); + uncorePMUDiscovery = std::make_shared(*this); } initUncoreObjects(); @@ -6616,12 +6702,13 @@ void PCM::readAndAggregateUncoreMCCounters(const uint32 socket, CounterStateType if (socket < UFSStatus.size()) { result.UFSStatus.clear(); - for (size_t die = 0; die < UFSStatus[socket].size(); ++die) + for (auto & e : UFSStatus[socket]) { - auto & handle = UFSStatus[socket][die]; - if (handle.get() && die < handle->getNumEntries()) + auto & handle = e.tpmiHandle; + const auto pos = e.pos; + if (handle.get() && pos < handle->getNumEntries()) { - const auto value = handle->read64(die); + const auto value = handle->read64(pos); DBG(3, std::hex , value , std::dec); result.UFSStatus.push_back(value); } @@ -7235,6 +7322,220 @@ uint32 PCM::getNumSockets() const return (uint32)num_sockets; } +int32 PCM::mapNUMANodeToSocket(uint32 numa_node_id) const +{ + // Check cache (thread-safe read) + { + pcm::Mutex::Scope lock(numaNodeToSocketCacheMutex); + auto it = numaNodeToSocketCache.find(numa_node_id); + if (it != numaNodeToSocketCache.end()) + { + return it->second; + } + } + + // Cache miss, compute the result + int32 socket_id = -1; + + // Helper lambda to cache the result before returning + auto cacheAndReturn = [&](int32 result) -> int32 { + pcm::Mutex::Scope lock(numaNodeToSocketCacheMutex); + numaNodeToSocketCache[numa_node_id] = result; + return result; + }; + +#ifdef __linux__ + // On Linux, read the CPU list for this NUMA node and map to socket + std::ostringstream path; + path << "/sys/devices/system/node/node" << numa_node_id << "/cpulist"; + + std::ifstream cpulist_file(path.str()); + if (!cpulist_file.is_open()) + { + // Try alternative path with /pcm prefix (for containerized environments) + cpulist_file.open("/pcm" + path.str()); + if (!cpulist_file.is_open()) + { + DBG(2, "Cannot open NUMA node cpulist file: ", path.str()); + return cacheAndReturn(-1); + } + } + + std::string cpulist; + std::getline(cpulist_file, cpulist); + + if (cpulist.empty()) + { + DBG(2, "Empty CPU list for NUMA node ", numa_node_id); + return cacheAndReturn(-1); + } + + // Parse the first CPU from the list (format: "0-15,32-47" or "0" or "0,2,4") + size_t first_cpu_end = cpulist.find_first_of(",-"); + std::string first_cpu_str = (first_cpu_end == std::string::npos) ? cpulist : cpulist.substr(0, first_cpu_end); + + try + { + uint32 first_cpu = std::stoul(first_cpu_str); + if (first_cpu < topology.size()) + { + socket_id = topology[first_cpu].socket_id; + DBG(3, "NUMA node ", numa_node_id, " maps to socket ", socket_id); + return cacheAndReturn(socket_id); + } + } + catch (const std::exception& e) + { + DBG(2, "Failed to parse CPU ID from cpulist: ", cpulist, " error: ", e.what()); + return cacheAndReturn(-1); + } + + return cacheAndReturn(-1); +#elif defined(_MSC_VER) + // On Windows, use GetLogicalProcessorInformationEx to map NUMA node to processors + // and then map processor to socket using topology information + DWORD length = 0; + GetLogicalProcessorInformationEx(RelationNumaNode, nullptr, &length); + if (GetLastError() != ERROR_INSUFFICIENT_BUFFER) + { + return cacheAndReturn(-1); + } + + std::vector buffer(length); + auto info = reinterpret_cast(buffer.data()); + + if (!GetLogicalProcessorInformationEx(RelationNumaNode, info, &length)) + { + return cacheAndReturn(-1); + } + + // Iterate through NUMA nodes + DWORD offset = 0; + while (offset < length) + { + auto current = reinterpret_cast( + reinterpret_cast(info) + offset); + + if (current->Relationship == RelationNumaNode && + current->NumaNode.NodeNumber == numa_node_id) + { + // The GROUP_AFFINITY structure contains a processor group number and affinity mask + WORD groupNumber = current->NumaNode.GroupMask.Group; + KAFFINITY mask = current->NumaNode.GroupMask.Mask; + + if (mask != 0) + { + auto BitScanForward64 = [](unsigned long* Index, uint64_t Mask) + { + if (Mask == 0) return 0; + + // Magic numbers for LSB (chess engine style) + static const uint64_t magic = 0x03f79d71b4cb0a89ULL; + uint64_t isolated = Mask & -Mask; // Rightmost set bit + *Index = (unsigned long)(((isolated * magic) >> 58)); + }; + + // Find first set bit (first processor in this NUMA node within this group) + DWORD bitPosition = 0; + BitScanForward64(&bitPosition, mask); + + // On Windows, we need to find the logical processor ID that corresponds to + // this bit position in this group. We iterate through topology to find a match. + // Note: This is a simplified approach. A more robust implementation would need + // to query the system for the mapping between group/bit position and logical processor IDs. + for (size_t cpu = 0; cpu < topology.size(); ++cpu) + { + // Check if this CPU belongs to the current NUMA node + // Since we don't have direct group information in topology, we use a heuristic: + // try the first CPU we find in the topology array + // A better approach would store group information in TopologyEntry + if (cpu == bitPosition + (groupNumber * 64)) // Rough approximation + { + socket_id = topology[cpu].socket_id; + return cacheAndReturn(socket_id); + } + } + + // Fallback: just return the socket_id of the first available CPU if within bounds + if (bitPosition < topology.size()) + { + socket_id = topology[bitPosition].socket_id; + return cacheAndReturn(socket_id); + } + } + } + + offset += current->Size; + } + + return cacheAndReturn(-1); +#elif defined(__FreeBSD__) || defined(__DragonFly__) + // FreeBSD implementation using vm.ndomains and cpuset APIs + + // First check if NUMA is enabled on this system + int ndomains = 0; + size_t len = sizeof(ndomains); + + if (sysctlbyname("vm.ndomains", &ndomains, &len, nullptr, 0) != 0) + { + DBG(2, "Cannot query vm.ndomains, NUMA not available"); + return cacheAndReturn(-1); + } + + if (ndomains <= 1) + { + // NUMA not enabled or single domain system + DBG(3, "NUMA not enabled on FreeBSD (vm.ndomains = ", ndomains, ")"); + return cacheAndReturn(-1); + } + + // Validate NUMA node ID + if (numa_node_id >= (uint32)ndomains) + { + DBG(2, "Invalid NUMA node ID ", numa_node_id, " (max: ", ndomains - 1, ")"); + return cacheAndReturn(-1); + } + +#if defined(__FreeBSD__) && defined(CPU_WHICH_DOMAIN) + // On FreeBSD with CPU_WHICH_DOMAIN support (FreeBSD 12.0+) + // Query which CPUs belong to this NUMA domain + cpuset_t cpuset; + CPU_ZERO(&cpuset); + + // cpuset_getaffinity with CPU_WHICH_DOMAIN returns the cpuset for a specific NUMA domain + if (cpuset_getaffinity(CPU_LEVEL_WHICH, CPU_WHICH_DOMAIN, numa_node_id, + sizeof(cpuset), &cpuset) == 0) + { + // Find the first CPU in this domain's cpuset + for (size_t cpu = 0; cpu < topology.size(); ++cpu) + { + if (CPU_ISSET(cpu, &cpuset)) + { + socket_id = topology[cpu].socket_id; + DBG(3, "NUMA domain ", numa_node_id, " maps to socket ", socket_id); + return cacheAndReturn(socket_id); + } + } + } + else + { + DBG(2, "cpuset_getaffinity failed for domain ", numa_node_id); + } +#endif + + return cacheAndReturn(-1); +#elif defined(__APPLE__) + // On macOS, NUMA information is not readily available + // For now, return -1 to indicate the mapping is not available + (void)numa_node_id; // Suppress unused parameter warning + return cacheAndReturn(-1); +#else + // Unsupported platform + (void)numa_node_id; // Suppress unused parameter warning + return cacheAndReturn(-1); +#endif +} + uint32 PCM::getAccel() const { return accel; @@ -7615,7 +7916,7 @@ bool PCM::useLinuxPerfForUncore() const bool secureBoot = isSecureBoot(); #ifdef PCM_USE_PERF const auto imcIDs = enumeratePerfPMUs("imc", 100); - std::cerr << "INFO: Linux perf interface to program uncore PMUs is " << (imcIDs.empty()?"NOT ":"") << "present\n"; + if (!quietMode) std::cerr << "INFO: Linux perf interface to program uncore PMUs is " << (imcIDs.empty()?"NOT ":"") << "present\n"; if (imcIDs.empty()) { use = 0; @@ -7624,12 +7925,12 @@ bool PCM::useLinuxPerfForUncore() const const char * perf_env = std::getenv("PCM_USE_UNCORE_PERF"); if (perf_env != NULL && std::string(perf_env) == std::string("1")) { - std::cerr << "INFO: using Linux perf interface to program uncore PMUs because env variable PCM_USE_UNCORE_PERF=1\n"; + if (!quietMode) std::cerr << "INFO: using Linux perf interface to program uncore PMUs because env variable PCM_USE_UNCORE_PERF=1\n"; use = 1; } if (secureBoot) { - std::cerr << "INFO: Secure Boot detected. Using Linux perf for uncore PMU programming.\n"; + if (!quietMode) std::cerr << "INFO: Secure Boot detected. Using Linux perf for uncore PMU programming.\n"; use = 1; } #else @@ -7724,7 +8025,7 @@ ServerUncorePMUs::ServerUncorePMUs(uint32 socket_, const PCM * pcm) : initDirect(socket_, pcm); } - std::cerr << "Socket " << socket_ << ": " << + if (!PCM::getQuietMode()) std::cerr << "Socket " << socket_ << ": " << getNumMC() << " memory controllers detected with total number of " << getNumMCChannels() << " channels. " << getNumQPIPorts() << " " << pcm->xPI() << " ports detected." << " " << m2mPMUs.size() << " M2M (mesh to memory)/B2CMI blocks detected." diff --git a/src/cpucounters.h b/src/cpucounters.h index 0ca20ddb..4091ec75 100644 --- a/src/cpucounters.h +++ b/src/cpucounters.h @@ -43,6 +43,8 @@ #include #include #include +#include +#include "mutex.h" #ifdef PCM_USE_PERF #include @@ -651,13 +653,23 @@ class PCM_API PCM UFS_FABRIC_CLUSTER_OFFSET = 1, UFS_STATUS = 0 }; - std::vector > > UFSStatus; + struct UFSStatusEntry + { + std::shared_ptr tpmiHandle; + size_t pos; + UFSStatusEntry() : tpmiHandle(nullptr), pos(0) {} + UFSStatusEntry(const std::shared_ptr& handle, size_t position) : tpmiHandle(handle), pos(position) {} + }; + std::vector > UFSStatus; std::vector topology; + mutable std::unordered_map numaNodeToSocketCache; // Cache for mapNUMANodeToSocket + mutable pcm::Mutex numaNodeToSocketCacheMutex; // Mutex to protect cache access SystemRoot* systemTopology; std::string errorMessage; static PCM * instance; + static std::atomic quietMode; bool programmed_core_pmu{false}; std::vector > MSR; std::vector > serverUncorePMUs; @@ -1312,6 +1324,23 @@ class PCM_API PCM public: static bool isInitialized() { return instance != nullptr; } + /*! + \brief Set quiet mode for PCM initialization + + When quiet mode is enabled, only errors are output during PCM initialization. + This method should be called before getInstance() is called for the first time. + + \param enable true to enable quiet mode, false to disable + */ + static void setQuietMode(bool enable) { quietMode = enable; } + + /*! + \brief Check if quiet mode is enabled + + \return true if quiet mode is enabled, false otherwise + */ + static bool getQuietMode() { return quietMode; } + //! check if TMA level 1 metrics are supported bool isHWTMAL1Supported() const; @@ -2045,6 +2074,14 @@ class PCM_API PCM //! \return socket identifier int32 getSocketId(uint32 core_id) const { return (int32)topology[core_id].socket_id; } + //! \brief Maps NUMA node ID to CPU socket ID + //! \param numa_node_id NUMA node identifier + //! \return socket identifier, or -1 if mapping is not available or numa_node_id is invalid + //! \note On Linux: Uses /sys/devices/system/node/nodeX/cpulist + //! \note On Windows: Uses GetLogicalProcessorInformationEx (may have limitations with multi-group processors) + //! \note On FreeBSD: Uses vm.ndomains and cpuset_getdomain (FreeBSD 12.0+) + //! \note On macOS: Not implemented, returns -1 + int32 mapNUMANodeToSocket(uint32 numa_node_id) const; size_t getNumCXLPorts(uint32 socket) const { diff --git a/src/mmio.cpp b/src/mmio.cpp index bceba05d..ee112d9f 100644 --- a/src/mmio.cpp +++ b/src/mmio.cpp @@ -239,10 +239,14 @@ MMIORange::MMIORange(const uint64 baseAddr_, const uint64 size_, const bool read silent(silent_), core(core_) { - const int oflag = readonly ? O_RDONLY : O_RDWR; + // SDL330: Use O_NOFOLLOW to reject symlinks + const int oflag = (readonly ? O_RDONLY : O_RDWR) | O_NOFOLLOW; int handle = ::open("/dev/mem", oflag); if (handle < 0) { + if (errno == ELOOP) { + std::cerr << "SDL330 CRITICAL: Symlink detected at /dev/mem\n"; + } std::ostringstream strstr; strstr << "opening /dev/mem failed: errno is " << errno << " (" << strerror(errno) << ")\n"; if (!silent) diff --git a/src/pci.cpp b/src/pci.cpp index 86c02efc..6a008640 100644 --- a/src/pci.cpp +++ b/src/pci.cpp @@ -6,12 +6,20 @@ // Jim Harris (FreeBSD) #include +#include +#include +#include #include #include #include #include #include +#include +#include +#include +#include #include "pci.h" +#include "cpucounters.h" #ifndef _MSC_VER #include @@ -31,22 +39,64 @@ #if defined (__FreeBSD__) || defined(__DragonFly__) #include +#include #endif namespace pcm { #ifdef _MSC_VER +void readSRATTable(std::unordered_map& pciToNuma); + extern HMODULE hOpenLibSys; static char * nonZeroGroupErrMsg = "Non-zero PCI group segments are not supported in Winring0 driver, make sure MSR.sys driver can be used."; +// Helper function to compute NUMA node for Windows +static int32 getNUMANodeWindows(uint32 groupnr, uint32 actual_bus, uint32 device, uint32 function) +{ + // Windows implementation: read SRAT ACPI table to map PCI devices to NUMA nodes + static std::unordered_map pciToNuma; + static std::mutex initMutex; + static bool initialized = false; + + // Thread-safe initialization using double-checked locking + // cppcheck-suppress identicalInnerCondition + if (!initialized) + { + std::lock_guard lock(initMutex); + // cppcheck-suppress identicalInnerCondition + if (!initialized) + { + readSRATTable(pciToNuma); + initialized = true; + } + } + + // Construct key matching SRAT format: segment(16) | bus(8) | device(5) | function(3) + uint64_t key = ((uint64_t)groupnr << 16) | ((uint64_t)actual_bus << 8) | + ((uint64_t)device << 3) | function; + + auto it = pciToNuma.find(key); + if (it != pciToNuma.end()) + { + DBG(3, "Found NUMA node ", it->second, " for PCI device ", std::hex, + groupnr, ":", actual_bus, ":", device, ".", function, std::dec); + return (int32)it->second; + } + + DBG(2, "No NUMA affinity found in SRAT for PCI device ", std::hex, + groupnr, ":", actual_bus, ":", device, ".", function, std::dec); + return -1; +} + PciHandle::PciHandle(uint32 groupnr_, uint32 bus_, uint32 device_, uint32 function_) : hDriver(openMSRDriver()), bus((groupnr_ << 8) | bus_), device(device_), function(function_), - pciAddress(PciBusDevFunc(bus_, device_, function_)) + pciAddress(PciBusDevFunc(bus_, device_, function_)), + numaNode(-1) { DBG(3, "Creating PCI Config space handle at g:b:d:f ", groupnr_, ":", bus_, ":", device_, ":", function_); if (groupnr_ != 0 && hDriver == INVALID_HANDLE_VALUE) @@ -59,6 +109,16 @@ PciHandle::PciHandle(uint32 groupnr_, uint32 bus_, uint32 device_, uint32 functi { throw std::runtime_error("MSR and Winring0 drivers can't be opened"); } + + // Initialize NUMA node during construction + const uint32 groupnr = (bus >> 8); + const uint32 actual_bus = bus & 0xFF; + numaNode = getNUMANodeWindows(groupnr, actual_bus, device, function); +} + +int32 PciHandle::getNUMANode() const +{ + return numaNode; } bool PciHandle::exists(uint32 groupnr_, uint32 bus_, uint32 device_, uint32 function_) @@ -195,15 +255,261 @@ PciHandle::~PciHandle() if (hDriver != INVALID_HANDLE_VALUE) CloseHandle(hDriver); } +// Windows implementation to read MCFG table from ACPI firmware +int PciHandle::openMcfgTable() { + // On Windows, ACPI tables are accessed via GetSystemFirmwareTable API + // rather than through file system. This function returns -1 to indicate + // the file-based approach is not available on Windows. + // See PciHandle::readMCFGRecords() for the Windows implementation. + return -1; +} + +// Windows implementation to read MCFG ACPI table using Firmware Table API or physical memory +void PciHandle::readMCFGRecords(std::vector& mcfg) +{ + mcfg.clear(); + + // Signature for ACPI firmware tables + const DWORD acpiSignature = 'ACPI'; + // MCFG table signature (note: stored in reverse byte order in ACPI tables) + const DWORD mcfgSignature = 'GFCM'; // 'MCFG' in reverse + + // Try to get the MCFG table size first + UINT tableSize = GetSystemFirmwareTable(acpiSignature, mcfgSignature, nullptr, 0); + + if (tableSize == 0) + { + DWORD error = GetLastError(); + DBG(1, "GetSystemFirmwareTable failed to get MCFG table size. Error: ", error); + + // Fallback: use default segments for known platforms + MCFGRecord segment; + segment.startBusNumber = 0; + segment.endBusNumber = 0xff; + segment.baseAddress = 0; // Actual base address is platform-specific and not available without MCFG table + + auto maxSegments = 1; + switch (PCM::getCPUFamilyModelFromCPUID()) + { + case PCM::SPR: + case PCM::GNR: + maxSegments = 4; + break; + } + + for (segment.PCISegmentGroupNumber = 0; segment.PCISegmentGroupNumber < maxSegments; ++(segment.PCISegmentGroupNumber)) + { + mcfg.push_back(segment); + } + + std::cerr << "PCM Warning: Could not read MCFG table from firmware, using default segments\n"; + return; + } + + // Allocate buffer for the MCFG table + std::vector tableBuffer(tableSize); + + // Read the actual table + UINT bytesRead = GetSystemFirmwareTable(acpiSignature, mcfgSignature, tableBuffer.data(), tableSize); + + if (bytesRead == 0 || bytesRead != tableSize) + { + std::cerr << "PCM Error: Failed to read MCFG table from firmware\n"; + return; + } + + // Parse the MCFG table + // The table format is: ACPI header (variable) + MCFG records + if (tableSize < sizeof(MCFGHeader)) + { + std::cerr << "PCM Error: MCFG table too small\n"; + return; + } + + // Use memcpy to avoid potential alignment issues + MCFGHeader header; + std::memcpy(&header, tableBuffer.data(), sizeof(MCFGHeader)); + + DBG(1, "MCFG table signature: \"", + header.signature[0], header.signature[1], + header.signature[2], header.signature[3], + "\" MCFG table length: ", header.length, + " Number of MCFG records: ", header.nrecords()); + + // Verify signature + if (std::strncmp(header.signature, "MCFG", 4) != 0) + { + std::cerr << "PCM Error: Invalid MCFG table signature\n"; + return; + } + + // Validate header length to prevent integer underflow in nrecords() + if (header.length < sizeof(MCFGHeader)) + { + std::cerr << "PCM Error: Invalid MCFG table length (too small)\n"; + return; + } + + // Validate that the reported length matches the actual table size + if (header.length > tableSize) + { + std::cerr << "PCM Error: MCFG table length mismatch\n"; + return; + } + + // Read MCFG records + const unsigned segments = header.nrecords(); + const BYTE* recordPtr = tableBuffer.data() + sizeof(MCFGHeader); + + for (unsigned int i = 0; i < segments; ++i) + { + if (recordPtr + sizeof(MCFGRecord) > tableBuffer.data() + tableSize) + { + std::cerr << "PCM Error: MCFG record out of bounds\n"; + break; + } + + MCFGRecord record; + std::memcpy(&record, recordPtr, sizeof(MCFGRecord)); + + DBG(1, "MCFG segment " , i , ": ", + "BaseAddress=0x" , std::hex , record.baseAddress, + " PCISegmentGroupNumber=0x" , record.PCISegmentGroupNumber, + " startBusNumber=0x" , (unsigned)record.startBusNumber, + " endBusNumber=0x" , (unsigned)record.endBusNumber, + std::dec); + + mcfg.push_back(record); + recordPtr += sizeof(MCFGRecord); + } +} + +// Windows implementation to read SRAT ACPI table and build PCI device to NUMA node mapping +static void readSRATTable(std::unordered_map& pciToNuma) +{ + pciToNuma.clear(); + + const DWORD acpiSignature = 'ACPI'; + // SRAT table signature (note: stored in reverse byte order in ACPI tables) + const DWORD sratSignature = 'TARS'; // 'SRAT' in reverse + + // Try to get the SRAT table size first + UINT tableSize = GetSystemFirmwareTable(acpiSignature, sratSignature, nullptr, 0); + + if (tableSize == 0) + { + DBG(1, "SRAT table not available, NUMA node information will not be available"); + return; + } + + // Allocate buffer for the SRAT table + std::vector tableBuffer(tableSize); + + // Read the actual table + UINT bytesRead = GetSystemFirmwareTable(acpiSignature, sratSignature, tableBuffer.data(), tableSize); + + if (bytesRead == 0 || bytesRead != tableSize) + { + DBG(1, "Failed to read SRAT table from firmware"); + return; + } + + // SRAT table structure: + // - ACPI header (36 bytes): Signature(4) + Length(4) + Revision(1) + Checksum(1) + OEMID(6) + + // OEM Table ID(8) + OEM Revision(4) + Creator ID(4) + Creator Revision(4) + // - Reserved(4) + Reserved(8) + // - Followed by variable-length subtable structures + + if (tableSize < 36) + { + DBG(1, "SRAT table too small"); + return; + } + + // Verify signature + if (std::memcmp(tableBuffer.data(), "SRAT", 4) != 0) + { + DBG(1, "Invalid SRAT table signature"); + return; + } + + // Get table length from header using memcpy to avoid alignment issues + uint32_t tableLength; + std::memcpy(&tableLength, tableBuffer.data() + 4, sizeof(uint32_t)); + + DBG(2, "SRAT table found, length: ", tableLength); + + // Skip ACPI header (36 bytes) + Reserved fields (12 bytes) = 48 bytes + const BYTE* ptr = tableBuffer.data() + 48; + const BYTE* endPtr = tableBuffer.data() + (std::min)((uint32_t)tableSize, tableLength); + + while (ptr + 2 <= endPtr) + { + uint8_t type = ptr[0]; + uint8_t length = ptr[1]; + + if (ptr + length > endPtr) + { + DBG(2, "SRAT subtable extends beyond table boundary, stopping parse"); + break; + } + + if (type == 2) // PCI Device Affinity Structure + { + // Structure format (variable, at least 16 bytes): + // Type(1) + Length(1) + Reserved(2) + + // Proximity Domain(4) + PCI Segment(2) + PCI Bus(1) + + // Device/Function(1) + Flags(4) + Reserved(4) + + if (length < 16) + { + DBG(2, "SRAT PCI Device Affinity structure too small: ", (int)length); + ptr += length; + continue; + } + + // Use memcpy to avoid alignment issues + uint32_t proximityDomain; + uint16_t pciSegment; + std::memcpy(&proximityDomain, ptr + 4, sizeof(uint32_t)); + std::memcpy(&pciSegment, ptr + 8, sizeof(uint16_t)); + uint8_t pciBus = ptr[10]; + uint8_t deviceFunction = ptr[11]; + uint8_t pciDevice = (deviceFunction >> 3) & 0x1F; + uint8_t pciFunction = deviceFunction & 0x07; + + // Construct unique key: segment(16) | bus(8) | device(5) | function(3) + uint64_t key = ((uint64_t)pciSegment << 16) | ((uint64_t)pciBus << 8) | + ((uint64_t)pciDevice << 3) | pciFunction; + + pciToNuma[key] = proximityDomain; + + DBG(2, "SRAT: PCI ", std::hex, pciSegment, ":", (unsigned)pciBus, ":", + (unsigned)pciDevice, ".", (unsigned)pciFunction, + " -> NUMA node ", std::dec, proximityDomain); + } + + ptr += length; + } + + DBG(2, "SRAT parsing complete, found ", pciToNuma.size(), " PCI device entries"); +} + #elif __APPLE__ -PciHandle::PciHandle(uint32 groupnr_, uint32 bus_, uint32 device_, uint32 function_) : +PciHandle::PciHandle(uint32, uint32 bus_, uint32 device_, uint32 function_) : fd(-1), bus(bus_), device(device_), - function(function_) + function(function_), + numaNode(-1) { } +int32 PciHandle::getNUMANode() const +{ + return numaNode; +} + bool PciHandle::exists(uint32 groupnr_, uint32 bus_, uint32 device_, uint32 function_) { if (groupnr_ != 0) @@ -251,12 +557,95 @@ PciHandle::~PciHandle() #elif defined (__FreeBSD__) || defined(__DragonFly__) +// Helper function to compute NUMA node for FreeBSD +static int32 getNUMANodeFreeBSD(uint32 groupnr, uint32 bus, uint32 device, uint32 function) +{ + // FreeBSD implementation: try to query NUMA domain information via sysctl + // Return -1 if not available or on error + +#if defined(__FreeBSD__) || defined(__DragonFly__) + // First check if NUMA is enabled on this system + int ndomains = 0; + size_t len = sizeof(ndomains); + + if (sysctlbyname("vm.ndomains", &ndomains, &len, nullptr, 0) == 0) + { + if (ndomains <= 1) + { + // NUMA not enabled or single domain system + DBG(3, "NUMA not enabled on FreeBSD (vm.ndomains = ", ndomains, ")"); + return -1; + } + } + else + { + DBG(2, "Cannot query vm.ndomains, assuming NUMA not available"); + return -1; + } + + // Try platform-specific sysctl path for PCI device NUMA domain + // Note: This is not standardized across FreeBSD versions + // Buffer size: "hw.pci." (7) + max domain (10) + "." (1) + max bus (10) + "." (1) + + // max device (10) + "." (1) + max function (10) + ".numa_domain" (12) + null (1) = 63 + // Use 128 to be safe + constexpr size_t SYSCTL_PATH_MAX = 128; + char sysctl_path[SYSCTL_PATH_MAX]; + int ret; + + ret = snprintf(sysctl_path, sizeof(sysctl_path), + "hw.pci.%u.%u.%u.%u.numa_domain", + groupnr, bus, device, function); + + if (ret < 0 || ret >= (int)sizeof(sysctl_path)) + { + DBG(2, "sysctl path truncated or error for PCI device ", + std::hex, groupnr, ":", bus, ":", device, ".", function, std::dec); + return -1; + } + + int numa_node = -1; + len = sizeof(numa_node); + + if (sysctlbyname(sysctl_path, &numa_node, &len, nullptr, 0) == 0) + { + DBG(3, "Found NUMA node ", numa_node, " for PCI device ", + std::hex, groupnr, ":", bus, ":", device, ".", function, std::dec); + return numa_node; + } + + // Try alternative sysctl format with colon separators + ret = snprintf(sysctl_path, sizeof(sysctl_path), + "hw.pci.%u:%u:%u.%u.numa_domain", + groupnr, bus, device, function); + + if (ret < 0 || ret >= (int)sizeof(sysctl_path)) + { + DBG(2, "sysctl path truncated or error for PCI device ", + std::hex, groupnr, ":", bus, ":", device, ".", function, std::dec); + return -1; + } + + if (sysctlbyname(sysctl_path, &numa_node, &len, nullptr, 0) == 0) + { + DBG(3, "Found NUMA node ", numa_node, " for PCI device ", + std::hex, groupnr, ":", bus, ":", device, ".", function, std::dec); + return numa_node; + } + + DBG(2, "NUMA node information not available for PCI device ", + std::hex, groupnr, ":", bus, ":", device, ".", function, std::dec); +#endif + + return -1; +} + PciHandle::PciHandle(uint32 groupnr_, uint32 bus_, uint32 device_, uint32 function_) : fd(-1), groupnr(groupnr_), bus(bus_), device(device_), - function(function_) + function(function_), + numaNode(-1) { int handle = ::open("/dev/pci", O_RDWR | O_NOFOLLOW); if (handle < 0) { @@ -266,6 +655,14 @@ PciHandle::PciHandle(uint32 groupnr_, uint32 bus_, uint32 device_, uint32 functi throw std::exception(); } fd = handle; + + // Initialize NUMA node during construction + numaNode = getNUMANodeFreeBSD(groupnr, bus, device, function); +} + +int32 PciHandle::getNUMANode() const +{ + return numaNode; } bool PciHandle::exists(uint32 groupnr_, uint32 bus_, uint32 device_, uint32 function_) @@ -387,6 +784,44 @@ PciHandle::~PciHandle() // Linux implementation +// Helper function to retrieve NUMA node for a PCI device +int32 getNUMANodeLinux(uint32 groupnr, uint32 bus, uint32 device, uint32 function) +{ + std::ostringstream path; + path << std::hex << "/sys/bus/pci/devices/" + << std::setw(4) << std::setfill('0') << groupnr << ":" + << std::setw(2) << std::setfill('0') << bus << ":" + << std::setw(2) << std::setfill('0') << device << "." + << function << "/numa_node"; + + std::string numa_path = path.str(); + std::ifstream numa_file(numa_path); + if (!numa_file.is_open()) + { + // Try alternative path with /pcm prefix (follows existing codebase pattern + // for containerized or chroot environments where sysfs may be mounted under /pcm) + numa_file.open("/pcm" + numa_path); + if (!numa_file.is_open()) + { + DBG(2, "Cannot open NUMA node file: ", numa_path); + return -1; + } + } + + int32 numa_node = -1; + numa_file >> numa_node; + + DBG(3, "NUMA node for ", std::hex, std::setw(4), std::setfill('0'), groupnr, ":", + std::setw(2), bus, ":", std::setw(2), device, ".", function, std::dec, " is ", numa_node); + + if (numa_node == -1) + { + // No NUMA -> map to NUMA node 0 + numa_node = 0; + } + + return numa_node; +} int openHandle(uint32 groupnr_, uint32 bus, uint32 device, uint32 function) { @@ -416,9 +851,11 @@ int openHandle(uint32 groupnr_, uint32 bus, uint32 device, uint32 function) PciHandle::PciHandle(uint32 groupnr_, uint32 bus_, uint32 device_, uint32 function_) : fd(-1), + groupnr(groupnr_), bus(bus_), device(device_), - function(function_) + function(function_), + numaNode(-1) { int handle = openHandle(groupnr_, bus_, device_, function_); if (handle < 0) @@ -427,10 +864,18 @@ PciHandle::PciHandle(uint32 groupnr_, uint32 bus_, uint32 device_, uint32 functi + std::to_string(groupnr_) + ":" + std::to_string(bus_) + ":" + std::to_string(device_) + ":" + std::to_string(function_)); } fd = handle; + + // Initialize NUMA node during construction + numaNode = getNUMANodeLinux(groupnr, bus, device, function); // std::cout << "DEBUG: Opened "<< path.str().c_str() << " on handle "<< fd << "\n"; } +int32 PciHandle::getNUMANode() const +{ + return numaNode; +} + bool PciHandle::exists(uint32 groupnr_, uint32 bus_, uint32 device_, uint32 function_) { @@ -557,10 +1002,12 @@ void PciHandleMM::readMCFG() PciHandleMM::PciHandleMM(uint32 groupnr_, uint32 bus_, uint32 device_, uint32 function_) : fd(-1), mmapAddr(NULL), + groupnr(groupnr_), bus(bus_), device(device_), function(function_), - base_addr(0) + base_addr(0), + numaNode(-1) { int handle = ::open("/dev/mem", O_RDWR | O_NOFOLLOW); if (handle < 0) { @@ -604,6 +1051,14 @@ PciHandleMM::PciHandleMM(uint32 groupnr_, uint32 bus_, uint32 device_, uint32 fu std::cout << "mmap failed: errno is " << errno << "\n"; throw std::exception(); } + + // Initialize NUMA node during construction + numaNode = getNUMANodeLinux(groupnr, bus, device, function); +} + +int32 PciHandleMM::getNUMANode() const +{ + return numaNode; } bool PciHandleMM::exists(uint32 /*groupnr_*/, uint32 /*bus_*/, uint32 /*device_*/, uint32 /*function_*/) diff --git a/src/pci.h b/src/pci.h index 0007aadd..3b267d41 100644 --- a/src/pci.h +++ b/src/pci.h @@ -39,8 +39,8 @@ class PciHandle int32 fd; #endif -#if defined(__FreeBSD__) || defined(__DragonFly__) - uint32 groupnr; +#if defined(__FreeBSD__) || defined(__DragonFly__) || defined(__linux__) + uint32 groupnr{}; #endif uint32 bus; uint32 device; @@ -48,6 +48,7 @@ class PciHandle #ifdef _MSC_VER DWORD pciAddress; #endif + int32 numaNode; friend class PciHandleMM; @@ -65,10 +66,16 @@ class PciHandle int32 read64(uint64 offset, uint64 * value); + int32 getNUMANode() const; + virtual ~PciHandle(); protected: static int openMcfgTable(); +#ifdef _MSC_VER +public: + static void readMCFGRecords(std::vector& mcfg); +#endif }; #ifdef _MSC_VER @@ -86,10 +93,12 @@ class PciHandleMM int32 fd; char * mmapAddr; + uint32 groupnr; uint32 bus; uint32 device; uint32 function; uint64 base_addr; + int32 numaNode; static MCFGHeader mcfgHeader; static std::vector mcfgRecords; @@ -109,6 +118,8 @@ class PciHandleMM int32 read64(uint64 offset, uint64 * value); + int32 getNUMANode() const; + virtual ~PciHandleMM(); static const std::vector & getMCFGRecords(); @@ -124,19 +135,25 @@ class PciHandleMM #error "Platform not supported" #endif +#ifdef __linux__ +// Helper function to retrieve NUMA node for a PCI device (Linux only) +int32 getNUMANodeLinux(uint32 groupnr, uint32 bus, uint32 device, uint32 function); +#endif + template -inline void forAllIntelDevices(F f, int requestedDevice = -1, int requestedFunction = -1) +inline void forAllDevices(F f, const int requestedVendorID = -1, const int requestedDevice = -1, const int requestedFunction = -1) { std::vector mcfg; getMCFGRecords(mcfg); - auto probe = [&f](const uint32 group, const uint32 bus, const uint32 device, const uint32 function) + auto probe = [&f, &requestedVendorID](const uint32 group, const uint32 bus, const uint32 device, const uint32 function) { DBG(3, "Probing " , std::hex , group , ":" , bus , ":" , device , ":" , function , " " , std::dec); uint32 value = 0; try { PciHandleType h(group, bus, device, function); + DBG(3, "NUMA node: ", h.getNUMANode()); h.read32(0, &value); } catch(...) @@ -147,7 +164,7 @@ inline void forAllIntelDevices(F f, int requestedDevice = -1, int requestedFunct const uint32 vendor_id = value & 0xffff; const uint32 device_id = (value >> 16) & 0xffff; DBG(3, "Found dev " , std::hex , vendor_id , ":" , device_id , std::dec); - if (vendor_id != PCM_INTEL_PCI_VENDOR_ID) + if (requestedVendorID >= 0 && int(vendor_id) != requestedVendorID) { return; } @@ -189,6 +206,12 @@ inline void forAllIntelDevices(F f, int requestedDevice = -1, int requestedFunct } } +template +inline void forAllIntelDevices(F f, int requestedDevice = -1, int requestedFunction = -1) +{ + forAllDevices(f, PCM_INTEL_PCI_VENDOR_ID, requestedDevice, requestedFunction); +} + union VSEC { struct { uint64 cap_id:16; @@ -215,6 +238,8 @@ void processDVSEC(MatchFunc matchFunc, ProcessFunc processFunc) DBG(2, "Intel device scan.found " , std::hex , group , ":" , bus , " : " , device , " : " , function , " " , device_id); uint32 status{0}; PciHandleType h(group, bus, device, function); + const auto NUMANode = h.getNUMANode(); + DBG(2, "NUMA node: ", NUMANode); h.read32(4, &status); // read status if (status & 0x100000) // has capability list { @@ -248,7 +273,7 @@ void processDVSEC(MatchFunc matchFunc, ProcessFunc processFunc) if (type == 0) // 32-bit address { bar &= ~0xfull; - processFunc(bar, header); + processFunc(bar, header, NUMANode); } else if (type == 2) // 64-bit address { @@ -258,7 +283,7 @@ void processDVSEC(MatchFunc matchFunc, ProcessFunc processFunc) uint64 full_bar = (uint64(bar_high) << 32) | uint64(bar); full_bar &= ~0xfull; DBG(2, " full_bar = 0x", std::hex, full_bar, std::dec); - processFunc(full_bar, header); + processFunc(full_bar, header, NUMANode); } else { diff --git a/src/pcm-msr.cpp b/src/pcm-msr.cpp index 6a40cf36..761b4de3 100644 --- a/src/pcm-msr.cpp +++ b/src/pcm-msr.cpp @@ -18,6 +18,9 @@ #endif #include #include +#ifndef _MSC_VER +#include +#endif #include using namespace pcm; @@ -131,8 +134,22 @@ int mainThrows(int argc, char * argv[]) #endif if (outfile.length() > 0){ outflag = true; - ofile = fopen(outfile.c_str(),"w"); - if (ofile==NULL){ +#ifdef _MSC_VER + // SDL330: Check for symlink/reparse point before opening (Windows) + DWORD attrs = GetFileAttributesA(outfile.c_str()); + if (attrs != INVALID_FILE_ATTRIBUTES && (attrs & FILE_ATTRIBUTE_REPARSE_POINT)) { + printf("SDL330 ERROR: Symlink/reparse point detected at '%s' (skipping write)\n", outfile.c_str()); + outflag = false; + } else { + ofile = fopen(outfile.c_str(),"w"); + } +#else + // SDL330: Use O_NOFOLLOW to reject symlinks + int fd = open(outfile.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_NOFOLLOW, 0644); + ofile = (fd >= 0) ? fdopen(fd, "w") : NULL; + if (fd >= 0 && !ofile) close(fd); +#endif + if (outflag && ofile==NULL){ printf("ERROR: can not open '%s' (skipping write)\n",outfile.c_str()); printf(" (maybe a sudo issue .. need o+rwx on directory)\n"); outflag = false; diff --git a/src/pcm-pcicfg.cpp b/src/pcm-pcicfg.cpp index a991ab74..25da0b18 100644 --- a/src/pcm-pcicfg.cpp +++ b/src/pcm-pcicfg.cpp @@ -3,6 +3,7 @@ // written by Roman Dementiev #include "cpucounters.h" +#include "lspci.h" #ifdef _MSC_VER #include #include "windows/windriver.h" @@ -21,12 +22,16 @@ using namespace pcm; void print_usage(const char * progname) { - std::cout << "Usage " << progname << " [-w value] [-d] [-i ID] [group bus device function] offset\n\n"; + std::cout << "Usage " << progname << " [-w value] [-d] [-n] [-i ID] [group bus device function] offset\n"; + std::cout << " " << progname << " -l [-v] [-d]\n\n"; std::cout << " Reads/writes 32-bit PCICFG register \n"; std::cout << " -w value : write the value before reading \n"; std::cout << " -b low:high : read or write only low..high bits of the register\n"; std::cout << " -d : output all numbers in dec (default is hex)\n"; + std::cout << " -n : print NUMA node of the device\n"; std::cout << " -i ID : specify Intel device ID instead of group bus device function\n"; + std::cout << " -l : list all PCI devices (similar to lspci)\n"; + std::cout << " -v : increase verbosity (can be used multiple times with -l)\n"; std::cout << " --version : print application version\n"; std::cout << "\n"; } @@ -51,11 +56,14 @@ int mainThrows(int argc, char * argv[]) uint32 value = 0; bool write = false; bool dec = false; + bool print_numa = false; + bool list_devices = false; + int verbosity = 0; uint32 deviceID = 0; std::pair bits{-1, -1}; int my_opt = -1; - while ((my_opt = getopt(argc, argv, "i:w:db:")) != -1) + while ((my_opt = getopt(argc, argv, "i:w:db:nlv")) != -1) { switch (my_opt) { @@ -72,6 +80,15 @@ int mainThrows(int argc, char * argv[]) case 'd': dec = true; break; + case 'n': + print_numa = true; + break; + case 'l': + list_devices = true; + break; + case 'v': + verbosity++; + break; default: print_usage(argv[0]); return -1; @@ -80,8 +97,12 @@ int mainThrows(int argc, char * argv[]) if (optind + ((deviceID)?0:4) >= argc) { - print_usage(argv[0]); - return -1; + // Allow -l option without additional arguments + if (!list_devices) + { + print_usage(argv[0]); + return -1; + } } int group = -1; @@ -105,12 +126,132 @@ int mainThrows(int argc, char * argv[]) } #endif - auto one = [&dec,&write,&bits](const uint32 & group, const uint32 & bus, const uint32 & device, const uint32 & function, const uint32 & offset, uint32 value) + // Handle list devices mode + if (list_devices) + { + // Load PCI database for device name lookups if verbosity is 1 or higher + PCIDB pciDB; + if (verbosity >= 1) + { + load_PCIDB(pciDB); + } + + // List all PCI devices + forAllDevices([&dec, &verbosity, &pciDB](const uint32 group, const uint32 bus, const uint32 device, const uint32 function, const uint32 device_id) + { + if (PciHandleType::exists(group, bus, device, function) == false) + { + return; + } + uint32 vendor_id = 0; + uint32 dev_id = 0; + int32 numa_node = -1; + + try + { + PciHandleType h(group, bus, device, function); + uint32 value = 0; + h.read32(0, &value); + vendor_id = value & 0xffff; + dev_id = (value >> 16) & 0xffff; + numa_node = h.getNUMANode(); + } + catch (...) + { + DBG(2, "Error accessing PCI device ", std::hex, group, ":", bus, ":", device, ".", function); + return; + } + // Basic format: segment:bus:device.function + if (dec) + { + std::cout << std::dec << std::setfill('0') << std::setw(4) << group << ":" + << std::setfill('0') << std::setw(2) << bus << ":" + << std::setfill('0') << std::setw(2) << device << "." + << function; + } + else + { + std::cout << std::hex << std::setfill('0') << std::setw(4) << group << ":" + << std::setfill('0') << std::setw(2) << bus << ":" + << std::setfill('0') << std::setw(2) << device << "." + << function; + } + + if (dec) + { + std::cout << " " << std::dec << vendor_id << ":" << dev_id; + } + else + { + std::cout << " " << std::hex << std::setfill('0') << std::setw(4) << vendor_id + << ":" << std::setfill('0') << std::setw(4) << dev_id; + } + + // Add device names with verbosity level 1 or higher + if (verbosity >= 1 && !pciDB.first.empty()) + { + auto vendor_it = pciDB.first.find(vendor_id); + if (vendor_it != pciDB.first.end()) + { + std::cout << " " << vendor_it->second; + + auto device_map_it = pciDB.second.find(vendor_id); + if (device_map_it != pciDB.second.end()) + { + auto device_it = device_map_it->second.find(dev_id); + if (device_it != device_map_it->second.end()) + { + std::cout << " - " << device_it->second; + } + } + } + } + + // Add NUMA node information + if (verbosity >= 2) + { + std::cout << " (NUMA node: "; + if (numa_node >= 0) + { + std::cout << std::dec << numa_node; + } + else + { + std::cout << "n/a"; + } + std::cout << ")"; + } + + std::cout << "\n"; + }); + + return 0; + } + + auto one = [&dec,&write,&bits,&print_numa](const uint32 & group, const uint32 & bus, const uint32 & device, const uint32 & function, const uint32 & offset, uint32 value) { try { PciHandleType h(group, bus, device, function); if (!dec) std::cout << std::hex << std::showbase; + + // Print NUMA node if requested + if (print_numa) + { + int32 numa_node = h.getNUMANode(); + std::cout << " NUMA node: "; + if (numa_node >= 0) + { + std::cout << std::dec << numa_node; + } + else + { + std::cout << "not available"; + } + std::cout << " for " << group << ":" << bus << ":" << device << ":" << function << "\n"; + if (!dec) std::cout << std::hex << std::showbase; + } + readOldValueHelper(bits, value, write, [&h, &offset](uint32 & old_value){ h.read32(offset, &old_value); return true; }); if (write) { diff --git a/src/pcm-power.cpp b/src/pcm-power.cpp index cb3cc610..2af6b956 100644 --- a/src/pcm-power.cpp +++ b/src/pcm-power.cpp @@ -109,14 +109,17 @@ namespace PERF_LIMIT_REASON_TPMI { bool PERF_LIMIT_REASON_TPMI_Supported = false; auto numTPMIInstances = TPMIHandle::getNumInstances(); + DBG(1, "TPMI instances detected: ", numTPMIInstances); for (size_t i = 0; i < numTPMIInstances; ++i) { TPMIHandle h(i, PERF_LIMIT_REASON_TPMI_ID, PERF_LIMIT_REASON_TPMI_HEADER); + DBG(1, "TPMI instance ", i, " has ", h.getNumEntries(), " entries"); for (uint32 j = 0; j < h.getNumEntries(); ++j) { const auto header = h.read64(j); if (header == ~0ULL) { + DBG(1, "TPMI instance ", i, " die ", j, " header read failed"); return false; } const auto version = extract_bits_64(header, 7, 0); @@ -132,6 +135,7 @@ namespace PERF_LIMIT_REASON_TPMI int getMaxPMModuleID(const PCM * m) { int max_pm_module_id = -1; + DBG(1, "Detecting max PM module ID from MSR_PM_LOGICAL_ID. numCores: ", m->getNumCores()); for (unsigned int core = 0; core < m->getNumCores(); ++core) { if (m->isCoreOnline(core) == false) @@ -140,8 +144,15 @@ namespace PERF_LIMIT_REASON_TPMI MsrHandle msr(core); uint64 val = 0; constexpr auto MSR_PM_LOGICAL_ID = 0x54; - msr.read(MSR_PM_LOGICAL_ID, &val); + const auto readSize = msr.read(MSR_PM_LOGICAL_ID, &val); + if (readSize != sizeof(uint64)) + { + std::cerr << "Error reading MSR_PM_LOGICAL_ID (0x54) on core " << core << "\n"; + continue; + } + DBG(2, "Core ", core, " MSR_PM_LOGICAL_ID value: 0x", std::hex, val, std::dec); const auto module_id = (int)extract_bits(val, 10, 3); + DBG(2, "Core ", core, " PM module ID: ", module_id); max_pm_module_id = (std::max)(max_pm_module_id, module_id); } return max_pm_module_id; diff --git a/src/pcm-sensor-server.cpp b/src/pcm-sensor-server.cpp index b060d16b..838a87f7 100644 --- a/src/pcm-sensor-server.cpp +++ b/src/pcm-sensor-server.cpp @@ -3087,7 +3087,7 @@ class HTTPConnection : public Work { response.setProtocol( HTTPProtocol::HTTP_1_1 ); } // Always send a response - response.createResponse( TextPlain, std::string( "400 Bad Request " ) + e.what(), RC_400_BadRequest ); + response.createResponse( TextPlain, std::string( "400 Bad Request" ), RC_400_BadRequest ); socketStream_ << response; break; } diff --git a/src/pcm-tpmi.cpp b/src/pcm-tpmi.cpp index 5c66be2d..daf53460 100644 --- a/src/pcm-tpmi.cpp +++ b/src/pcm-tpmi.cpp @@ -111,6 +111,9 @@ int mainThrows(int argc, char * argv[]) try { + PCM::setQuietMode(true); + auto pcmInstance = PCM::getInstance(); + if (instances.empty()) { for (size_t i = 0; i < TPMIHandle::getNumInstances(); ++i) @@ -126,6 +129,32 @@ int mainThrows(int argc, char * argv[]) continue; } TPMIHandle h(i, requestedID, requestedRelativeOffset, !write); + + // Get NUMA node and socket ID + int32 numaNode = h.getNUMANode(); + int32 socketId = -1; + if (numaNode >= 0) + { + socketId = pcmInstance->mapNUMANodeToSocket(static_cast(numaNode)); + } + + // Helper lambda to print socket ID and NUMA node information + auto printTopologyInfo = [&socketId, &numaNode]() { + if (socketId >= 0 || numaNode >= 0) + { + // Save stream format state + auto flags = std::cout.flags(); + + if (socketId >= 0) + std::cout << " (socket " << std::dec << socketId << ")"; + if (numaNode >= 0) + std::cout << " (NUMA node " << std::dec << numaNode << ")"; + + // Restore stream format state + std::cout.flags(flags); + } + }; + auto one = [&](const size_t p, uint64 value) { if (!dec) @@ -134,12 +163,16 @@ int mainThrows(int argc, char * argv[]) { old_value = h.read64(p); return true; }); if (write) { - std::cout << " Writing " << value << " to TPMI ID " << requestedID << "@" << requestedRelativeOffset << " for entry " << p << " in instance " << i << "\n"; + std::cout << " Writing " << value << " to TPMI ID " << requestedID << "@" << requestedRelativeOffset << " for entry " << p << " in instance " << i; + printTopologyInfo(); + std::cout << "\n"; h.write64(p, value); } value = h.read64(p); extractBitsPrintHelper(bits, value, dec); - std::cout << " from TPMI ID " << requestedID << "@" << requestedRelativeOffset << " for entry " << p << " in instance " << i << "\n\n"; + std::cout << " from TPMI ID " << requestedID << "@" << requestedRelativeOffset << " for entry " << p << " in instance " << i; + printTopologyInfo(); + std::cout << "\n\n"; }; if (entries.empty()) { diff --git a/src/pcm.cpp b/src/pcm.cpp index ba708be7..95bd948e 100644 --- a/src/pcm.cpp +++ b/src/pcm.cpp @@ -554,6 +554,7 @@ void print_output(PCM * m, } }; const std::vector uncoreDieTypes{getUncoreDieTypes(sktstate2[0])}; + DBG(2, " Uncore die types count: ", uncoreDieTypes.size()); if (uncoreDieTypes.empty() == false) { cout << setNextColor() << " Unc(Ghz) "; diff --git a/src/tpmi.cpp b/src/tpmi.cpp index 81c6b129..c3dd837f 100644 --- a/src/tpmi.cpp +++ b/src/tpmi.cpp @@ -6,6 +6,7 @@ #include "tpmi.h" #include "pci.h" #include "utils.h" +#include "debug.h" #include #include #include @@ -24,8 +25,12 @@ class PFSInstances public: // [TPMI ID][entry] -> base address typedef std::unordered_map > PFSMapType; - // [PFS instance][TPMI ID][entry] -> base address - typedef std::vector PFSInstancesType; + struct PFSInstance + { + PFSMapType pfsMap{}; // [TPMI ID][entry] -> base address + int32 NUMANode{-1}; + }; + typedef std::vector PFSInstancesType; private: static std::shared_ptr PFSInstancesSingleton; public: @@ -42,8 +47,10 @@ class PFSInstances { return vsec.fields.cap_id == 0xb // Vendor Specific DVSEC && vsec.fields.vsec_id == 0x42; // TPMI PM_Features - }, [&](const uint64 bar, const VSEC & vsec) + }, [&](const uint64 bar, const VSEC & vsec, const int32 NUMANode) { + DBG(1, "TPMI detection. Bar 0x", std::hex, bar, std::dec, + " NUMANode: ", NUMANode); struct PFS { uint64 TPMI_ID:8; @@ -63,7 +70,8 @@ class PFSInstances std::cerr << "Can't read PFS\n"; std::cerr << e.what(); } - PFSInstancesSingletonInit->push_back(PFSMapType()); + PFSInstancesSingletonInit->push_back(PFSInstance()); + PFSInstancesSingletonInit->back().NUMANode = NUMANode; for (const auto & pfs : pfsArray) { if (TPMIverbose) @@ -76,6 +84,12 @@ class PFSInstances "\t Attribute: " << pfs.Attribute << "\n"; } + DBG(1, " PFS TPMI_ID: ", pfs.TPMI_ID, + " NumEntries: ", pfs.NumEntries, + " EntrySize: ", pfs.EntrySize, + " CapOffset: ", pfs.CapOffset, + " Attribute: ", pfs.Attribute); + for (uint64 p = 0; p < pfs.NumEntries; ++p) { uint32 reg0 = 0; @@ -89,7 +103,8 @@ class PFSInstances std::cout << "can't read entry " << p << "\n"; std::cout << e.what(); } - PFSInstancesSingletonInit->back()[pfs.TPMI_ID].push_back(addr); + DBG(2, "can't read entry ", p, " error: ", e.what()); + PFSInstancesSingletonInit->back().pfsMap[pfs.TPMI_ID].push_back(addr); continue; } if (reg0 == TPMIInvalidValue) @@ -98,6 +113,7 @@ class PFSInstances { std::cout << "invalid entry " << p << "\n"; } + DBG(2, "invalid entry ", p); } else { @@ -112,7 +128,14 @@ class PFSInstances } std::cout << std::dec << "\n"; } - PFSInstancesSingletonInit->back()[pfs.TPMI_ID].push_back(addr); + DBG(2, "valid entry ", p); + for (uint64 i_offset = 0; i_offset < pfs.EntrySize * sizeof(uint32); i_offset += sizeof(uint64)) + { + uint64 reg = 0; + mmio_memcpy(®, addr + i_offset, sizeof(uint64), false); + DBG(2, " register 0x", std::hex , i_offset, " = 0x", reg, std::dec); + } + PFSInstancesSingletonInit->back().pfsMap[pfs.TPMI_ID].push_back(addr); } } } @@ -134,6 +157,7 @@ class TPMIHandleMMIO : public TPMIHandleInterface size_t offset; }; std::vector entries; + int32 numaNode{-1}; public: static size_t getNumInstances(); static void setVerbose(const bool); @@ -144,6 +168,10 @@ class TPMIHandleMMIO : public TPMIHandleInterface } uint64 read64(size_t entryPos) override; void write64(size_t entryPos, uint64 val) override; + int32 getNUMANode() override + { + return numaNode; + } }; size_t TPMIHandleMMIO::getNumInstances() @@ -160,7 +188,8 @@ TPMIHandleMMIO::TPMIHandleMMIO(const size_t instance_, const size_t ID_, const s { auto & pfsInstances = PFSInstances::get(); assert(instance_ < pfsInstances.size()); - for (const auto & addr: pfsInstances[instance_][ID_]) + numaNode = pfsInstances[instance_].NUMANode; + for (const auto & addr: pfsInstances[instance_].pfsMap[ID_]) { const auto requestedAddr = addr + requestedRelativeOffset; const auto baseAddr = roundDownTo4K(requestedAddr); @@ -185,6 +214,9 @@ void TPMIHandleMMIO::write64(size_t entryPos, uint64 val) } #ifdef __linux__ + +int32 getNUMANodeLinux(uint32 groupnr, uint32 bus, uint32 device, uint32 function); + class TPMIHandleDriver : public TPMIHandleInterface { TPMIHandleDriver(const TPMIHandleDriver&) = delete; @@ -197,6 +229,7 @@ class TPMIHandleDriver : public TPMIHandleInterface const size_t instance; const size_t ID; const size_t offset; + int32 numaNode{ -1 }; // const bool readonly; // not used size_t nentries; struct TPMIEntry { @@ -280,7 +313,8 @@ class TPMIHandleDriver : public TPMIHandleInterface { assert(available > 0); assert(instance < getNumInstances()); - const auto entries = readTPMIFile(AllIDPaths[instance][ID]); + const auto path = AllIDPaths[instance][ID]; + const auto entries = readTPMIFile(path); for (auto & e: entries) { if (e.data.empty() == false && e.data[0] != TPMIInvalidValue) @@ -289,6 +323,30 @@ class TPMIHandleDriver : public TPMIHandleInterface ++nentries; } } + // path is like /sys/kernel/debug/tpmi-0000:80:03.1/tpmi-id-0a + // extract the 0000:80:03.1 part: + const auto prefix = std::string("/sys/kernel/debug/tpmi-"); + const auto startPos = path.find(prefix); + assert(startPos != std::string::npos); + const auto endPos = path.find("/tpmi-id-"); + assert(endPos != std::string::npos); + const auto pciAddress = path.substr(startPos + prefix.size(), endPos - (startPos + prefix.size())); + DBG(2, "TPMIHandleDriver: PCI address: ", pciAddress); + std::istringstream iss(pciAddress); + uint32 segment = 0; + char separator{}; + uint32 bus = 0; + uint32 device = 0; + uint32 function = 0; + iss >> std::hex >> segment >> separator >> std::hex >> bus >> separator >> std::hex; + iss >> std::hex >> device; + iss >> separator >> std::hex >> function; + DBG(2, "TPMIHandleDriver: segment=", segment, + " bus=", bus, + " device=", device, + " function=", function); + numaNode = getNUMANodeLinux(segment, bus, device, function); + DBG(2, "TPMIHandleDriver: NUMA node: ", numaNode); } size_t getNumEntries() const override { @@ -320,6 +378,10 @@ class TPMIHandleDriver : public TPMIHandleInterface writeSysFS(path.c_str(), std::to_string(i) + "," + std::to_string(offset) + "," + std::to_string(out.ui32.low)); writeSysFS(path.c_str(), std::to_string(i) + "," + std::to_string(offset + 4) + "," + std::to_string(out.ui32.high)); } + int32 getNUMANode() override + { + return numaNode; + } }; int TPMIHandleDriver::available = -1; @@ -419,5 +481,10 @@ void TPMIHandle::write64(size_t entryPos, uint64 val) impl->write64(entryPos, val); } +int32 TPMIHandle::getNUMANode() +{ + assert(impl.get()); + return impl->getNUMANode(); +} } // namespace pcm diff --git a/src/tpmi.h b/src/tpmi.h index ab819dec..09223f26 100644 --- a/src/tpmi.h +++ b/src/tpmi.h @@ -21,6 +21,7 @@ class TPMIHandleInterface virtual size_t getNumEntries() const = 0; virtual uint64 read64(size_t entryPos) = 0; virtual void write64(size_t entryPos, uint64 val) = 0; + virtual int32 getNUMANode() = 0; virtual ~TPMIHandleInterface() {} }; @@ -36,6 +37,7 @@ class TPMIHandle : public TPMIHandleInterface size_t getNumEntries() const override; uint64 read64(size_t entryPos) override; void write64(size_t entryPos, uint64 val) override; + int32 getNUMANode() override; }; } // namespace pcm diff --git a/src/uncore_pmu_discovery.cpp b/src/uncore_pmu_discovery.cpp index bf569070..9cbc6f94 100644 --- a/src/uncore_pmu_discovery.cpp +++ b/src/uncore_pmu_discovery.cpp @@ -6,10 +6,11 @@ #include "mmio.h" #include "iostream" #include "utils.h" +#include "cpucounters.h" namespace pcm { -UncorePMUDiscovery::UncorePMUDiscovery() +UncorePMUDiscovery::UncorePMUDiscovery(PCM & m) { if (safe_getenv("PCM_NO_UNCORE_PMU_DISCOVERY") == std::string("1")) { @@ -17,12 +18,11 @@ UncorePMUDiscovery::UncorePMUDiscovery() } const auto debug = (safe_getenv("PCM_DEBUG_PMU_DISCOVERY") == std::string("1")); - size_t socket = 0; - - auto processTables = [this, &debug, &socket](const uint64 bar, const VSEC & vsec) + auto processTables = [this, &debug, &m](const uint64 bar, const VSEC & vsec, const int32 NUMANode) { try { - DBG(2, "Uncore discovery detection. Reading from bar 0x", std::hex, bar, std::dec); + DBG(1, "Uncore discovery detection. Reading from bar 0x", std::hex, bar, std::dec, + " NUMANode: ", NUMANode); constexpr size_t UncoreDiscoverySize = 3UL; union UncoreGlobalDiscovery { GlobalPMU pmu; @@ -30,7 +30,18 @@ UncorePMUDiscovery::UncorePMUDiscovery() }; UncoreGlobalDiscovery global; mmio_memcpy(global.table, bar, UncoreDiscoverySize * sizeof(uint64), true); - globalPMUs.resize(socket + 1); + size_t socket = 0; // default socket if NUMA node -> socket mapping fails + if (NUMANode >= 0) + { + const auto socketFromNUMANode = m.mapNUMANodeToSocket(NUMANode); + DBG(1, "Socket of NUMANode: ", socketFromNUMANode); + if (socketFromNUMANode >= 0) + { + socket = static_cast(socketFromNUMANode); + } + } + globalPMUs.resize((std::max)(socket + 1, globalPMUs.size())); + assert(socket < globalPMUs.size()); globalPMUs[socket].push_back(global.pmu); if (debug) { @@ -66,9 +77,9 @@ UncorePMUDiscovery::UncorePMUDiscovery() // unit.pmu.print(); boxPMUMap[unit.pmu.boxType].push_back(unit.pmu); } - boxPMUs.resize(socket + 1); + boxPMUs.resize((std::max)(socket + 1, boxPMUs.size())); + assert(socket < boxPMUs.size()); boxPMUs[socket].push_back(boxPMUMap); - ++socket; } catch (const std::exception & e) { @@ -109,6 +120,7 @@ UncorePMUDiscovery::UncorePMUDiscovery() { std::cout << "Socket " << s << " die " << die << " global PMU:\n"; std::cout << " "; + assert(s < globalPMUs.size() && die < globalPMUs[s].size()); globalPMUs[s][die].print(); std::cout << "Socket " << s << " die " << die << " unit PMUs:\n"; for (const auto& pmuType : boxPMUs[s][die]) diff --git a/src/uncore_pmu_discovery.h b/src/uncore_pmu_discovery.h index e970eaa9..7c42a283 100644 --- a/src/uncore_pmu_discovery.h +++ b/src/uncore_pmu_discovery.h @@ -10,6 +10,8 @@ namespace pcm { +class PCM; + constexpr auto SPR_PCU_BOX_TYPE = 4U; constexpr auto SPR_IMC_BOX_TYPE = 6U; constexpr auto SPR_UPILL_BOX_TYPE = 8U; @@ -168,8 +170,9 @@ class UncorePMUDiscovery } return 0; } + UncorePMUDiscovery() = delete; public: - UncorePMUDiscovery(); + UncorePMUDiscovery(PCM &); size_t getNumDies(const size_t socket) const { diff --git a/src/utils.cpp b/src/utils.cpp index 64aac699..18566fd3 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -64,12 +64,14 @@ void getMCFGRecords(std::vector& mcfg) { #ifdef __linux__ mcfg = PciHandleMM::getMCFGRecords(); +#elif defined(_MSC_VER) + PciHandle::readMCFGRecords(mcfg); #else MCFGRecord segment; segment.startBusNumber = 0; segment.endBusNumber = 0xff; auto maxSegments = 1; -#if defined (_MSC_VER) || defined(__FreeBSD__) || defined(__DragonFly__) +#if defined(__FreeBSD__) || defined(__DragonFly__) switch (PCM::getCPUFamilyModelFromCPUID()) { case PCM::SPR: diff --git a/src/utils.h b/src/utils.h index 6873ceed..7b988729 100644 --- a/src/utils.h +++ b/src/utils.h @@ -87,7 +87,10 @@ namespace pcm { } #ifdef _MSC_VER -#define PCM_SET_DLL_DIR SetDllDirectory(_T("")); +// Security hardening: remove the current working directory from the DLL search +// order to prevent DLL planting attacks (CWE-427). This ensures DLLs are only +// loaded from trusted system directories. +#define PCM_SET_DLL_DIR SetDllDirectory(_T("")); SetDefaultDllDirectories(LOAD_LIBRARY_SEARCH_SYSTEM32); #else #define PCM_SET_DLL_DIR #endif diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 193045a4..528f0d16 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -11,6 +11,16 @@ if(UNIX) add_executable(daemon_alignment_test ${TEST_FILE}) target_link_libraries(daemon_alignment_test) + if(NOT APPLE) + # numa_to_socket_test + add_executable(numa_to_socket_test numa_to_socket_test.cpp) + target_link_libraries(numa_to_socket_test Threads::Threads PCM_STATIC) + + # cache_verification_test + add_executable(cache_verification_test cache_verification_test.cpp) + target_link_libraries(cache_verification_test Threads::Threads PCM_STATIC) + endif() + # PCM_STATIC + pcm_sensor = urltest if(LINUX) add_executable(urltest urltest.cpp) @@ -42,4 +52,4 @@ if(PCM_FUZZ) target_link_libraries(pcm-sensor-server-ssl-fuzz Threads::Threads PCM_STATIC ${SSL_LIBS}) target_link_libraries(pcm-fuzz Threads::Threads PCM_STATIC) target_link_libraries(pcm-memory-fuzz Threads::Threads PCM_STATIC) -endif() \ No newline at end of file +endif() diff --git a/tests/cache_verification_test.cpp b/tests/cache_verification_test.cpp new file mode 100644 index 00000000..88a55714 --- /dev/null +++ b/tests/cache_verification_test.cpp @@ -0,0 +1,86 @@ +// SPDX-License-Identifier: BSD-3-Clause +// Copyright (c) 2025, Intel Corporation +// Test program to verify cache functionality in mapNUMANodeToSocket() + +#include +#include +#include "../src/cpucounters.h" + +using namespace pcm; +using namespace std::chrono; + +int main() +{ + std::cout << "Testing mapNUMANodeToSocket() cache functionality\n"; + std::cout << "================================================\n\n"; + + PCM * m = PCM::getInstance(); + + if (m->program() != PCM::Success) + { + std::cout << "Note: Cannot access CPU counters (expected on non-Intel or without root)\n"; + std::cout << "This test will still verify that the cache mechanism compiles correctly.\n"; + std::cout << "Cache verification test PASSED (compilation check)\n"; + return 0; + } + + std::cout << "PCM initialized successfully\n"; + std::cout << "Testing cache performance...\n\n"; + + // Test NUMA node 0 multiple times to measure caching effect + const int iterations = 100; + + // First call - should compute and cache + auto start = high_resolution_clock::now(); + int32 result1 = m->mapNUMANodeToSocket(0); + auto end = high_resolution_clock::now(); + auto first_duration = duration_cast(end - start).count(); + + std::cout << "First call (cache miss) for NUMA node 0: " << result1 + << " (took " << first_duration << " microseconds)\n"; + + // Subsequent calls - should use cache + start = high_resolution_clock::now(); + for (int i = 0; i < iterations; ++i) { + int32 result = m->mapNUMANodeToSocket(0); + if (result != result1) { + std::cerr << "ERROR: Inconsistent results from cache!\n"; + return 1; + } + } + end = high_resolution_clock::now(); + auto cached_duration = duration_cast(end - start).count(); + double avg_cached = static_cast(cached_duration) / iterations; + + std::cout << "Average time for " << iterations << " cached calls: " + << avg_cached << " microseconds per call\n"; + + // Cache should be significantly faster (at least 2x) + if (first_duration > 0 && avg_cached > 0) { + double speedup = static_cast(first_duration) / avg_cached; + std::cout << "\nSpeedup from caching: " << speedup << "x\n"; + + if (speedup > 2.0) { + std::cout << "\nCache is working effectively!\n"; + } else { + std::cout << "\nWarning: Cache speedup is less than expected, but this may be normal on some systems.\n"; + } + } + + // Test multiple NUMA nodes to verify cache stores multiple entries + std::cout << "\nTesting multiple NUMA nodes (0-7):\n"; + for (uint32 node = 0; node < 8; ++node) { + int32 socket = m->mapNUMANodeToSocket(node); + std::cout << " NUMA node " << node << " -> Socket " << socket << "\n"; + } + + // Test NUMA node > 255 (now also cached) + std::cout << "\nTesting NUMA node 256 (now also cached):\n"; + int32 result_256 = m->mapNUMANodeToSocket(256); + std::cout << " NUMA node 256 -> Socket " << result_256 << "\n"; + + std::cout << "\nCache verification test PASSED\n"; + + m->cleanup(); + return 0; +} diff --git a/tests/numa_test.cpp b/tests/numa_test.cpp new file mode 100644 index 00000000..4aead0ea --- /dev/null +++ b/tests/numa_test.cpp @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: BSD-3-Clause +// Copyright (c) 2024, Intel Corporation +// Test program for getNUMANode() API + +#include +#include +#include "../src/pci.h" + +using namespace pcm; + +int main() +{ + std::cout << "Testing getNUMANode() API\n"; + std::cout << "=========================\n\n"; + + // Note: Testing with PCI device 0:0:0.0 which is commonly the host bridge + // This device may not exist on all systems or may require root permissions to access + std::cout << "Attempting to test with PCI device 0:0:0.0 (host bridge)\n"; + + try + { + // Try to create a PciHandle for a common device + // We'll try bus 0, device 0, function 0 as it often exists + PciHandleType handle(0, 0, 0, 0); + + std::cout << "Successfully created PciHandle for 0:0:0.0\n"; + + // Get NUMA node + int32 numa_node = handle.getNUMANode(); + + std::cout << "NUMA node: " << numa_node; + if (numa_node == -1) + { + std::cout << " (not available or not applicable)"; + } + std::cout << "\n"; + + // Test reading vendor ID to verify the handle works + uint32 vendor_device_id = 0; + if (handle.read32(0, &vendor_device_id) == sizeof(uint32)) + { + uint32 vendor_id = vendor_device_id & 0xFFFF; + uint32 device_id = (vendor_device_id >> 16) & 0xFFFF; + std::cout << "Vendor ID: 0x" << std::hex << std::setw(4) << std::setfill('0') + << vendor_id << ", Device ID: 0x" << device_id << std::dec << "\n"; + } + + std::cout << "\nTest PASSED\n"; + return 0; + } + catch (const std::exception& e) + { + std::cerr << "Exception: " << e.what() << "\n"; + std::cerr << "Note: This is expected if device 0:0:0.0 doesn't exist or you don't have permissions\n"; + std::cerr << "\nTest completed (device not accessible)\n"; + return 0; // Not a failure - just means device doesn't exist + } +} diff --git a/tests/numa_to_socket_test.cpp b/tests/numa_to_socket_test.cpp new file mode 100644 index 00000000..b2395904 --- /dev/null +++ b/tests/numa_to_socket_test.cpp @@ -0,0 +1,67 @@ +// SPDX-License-Identifier: BSD-3-Clause +// Copyright (c) 2025, Intel Corporation +// Test program for mapNUMANodeToSocket() API + +#include +#include +#include "../src/cpucounters.h" + +using namespace pcm; + +int main() +{ + std::cout << "Testing mapNUMANodeToSocket() API\n"; + std::cout << "==================================\n\n"; + + PCM * m = PCM::getInstance(); + + if (m->program() != PCM::Success) + { + std::cerr << "Error: Cannot access CPU counters\n"; + std::cerr << "Try running as root/administrator\n"; + return 1; + } + + std::cout << "PCM initialized successfully\n"; + std::cout << "Number of sockets: " << m->getNumSockets() << "\n"; + std::cout << "Number of cores: " << m->getNumCores() << "\n\n"; + + // Test mapping for NUMA nodes 0-7 (most systems won't have more) + std::cout << "NUMA Node -> Socket Mapping:\n"; + std::cout << "-----------------------------\n"; + + bool found_valid_mapping = false; + for (uint32 numa_node = 0; numa_node < 8; ++numa_node) + { + int32 socket_id = m->mapNUMANodeToSocket(numa_node); + + if (socket_id >= 0) + { + std::cout << "NUMA node " << numa_node << " -> Socket " << socket_id << "\n"; + found_valid_mapping = true; + } + else + { + // Only show -1 for first few nodes to avoid clutter + if (numa_node < 4) + { + std::cout << "NUMA node " << numa_node << " -> Not available (returned " << socket_id << ")\n"; + } + } + } + + if (!found_valid_mapping) + { + std::cout << "\nNo valid NUMA node mappings found.\n"; + std::cout << "This may be expected on:\n"; + std::cout << " - Single-socket systems\n"; + std::cout << " - Systems without NUMA support\n"; + std::cout << " - macOS (not implemented)\n"; + std::cout << " - Systems where NUMA information is not available\n"; + } + + std::cout << "\nTest PASSED\n"; + + m->cleanup(); + return 0; +} diff --git a/tests/test.sh b/tests/test.sh index 82ee3fb4..8e57ad96 100755 --- a/tests/test.sh +++ b/tests/test.sh @@ -131,6 +131,20 @@ if [ "$?" -ne "0" ]; then exit 1 fi +echo Testing pcm-pcicfg with -n option +./pcm-pcicfg -n 0 0 0 0 0 +if [ "$?" -ne "0" ]; then + echo "Error in pcm-pcicfg with -n option" + exit 1 +fi + +echo Testing pcm-pcicfg with -n and -d options +./pcm-pcicfg -n -d 0 0 0 0 0 +if [ "$?" -ne "0" ]; then + echo "Error in pcm-pcicfg with -n -d options" + exit 1 +fi + echo Testing pcm-tpmi ./pcm-tpmi 2 0x10 -d -b 26:26 if [ "$?" -ne "0" ]; then