From 814b7f6510b195d66c48dda44147d42174905cbe Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Wed, 3 Dec 2025 14:12:16 +0100 Subject: [PATCH 1/8] add more debug tracing Change-Id: I2fa3dd144ccbbb67350a3468353885fe70474ad6 --- src/cpucounters.cpp | 5 ++++- src/msr.cpp | 6 +++++- src/topologyentry.h | 4 ++++ 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/src/cpucounters.cpp b/src/cpucounters.cpp index a285689c..249c9e5e 100644 --- a/src/cpucounters.cpp +++ b/src/cpucounters.cpp @@ -1140,6 +1140,7 @@ bool PCM::discoverSystemTopology() pcm_cpuid(0x1F, subleaf, cpuid_args); domain d; d.type = (TopologyEntry::DomainTypeID)extract_bits_32(cpuid_args.reg.ecx, 8, 15); + DBG(1 , "pcm_cpuid 0x1F cpuid_args.reg.ecx = " , cpuid_args.reg.ecx , " d.type = ", d.type); if (d.type == TopologyEntry::DomainTypeID::InvalidDomainTypeID) { break; @@ -4413,6 +4414,7 @@ PCM::ErrorCode PCM::programCoreCounters(const int i /* core */, MSR[i]->write(IA32_PERF_GLOBAL_OVF_CTRL, value); MSR[i]->write(IA32_CR_PERF_GLOBAL_CTRL, value); + DBG(3, "core_id = ", i, " wrote IA32_PERF_GLOBAL_OVF_CTRL and IA32_CR_PERF_GLOBAL_CTRL = 0x", std::hex, value, std::dec); } #ifdef PCM_USE_PERF else @@ -5679,7 +5681,7 @@ void BasicCounterState::readAndAggregate(std::shared_ptr msr) { { msr->read(IA32_PERF_GLOBAL_STATUS, &overflows); // read overflows - DBG(3, "Debug " , core_id , " IA32_PERF_GLOBAL_STATUS: " , overflows); + DBG(3, "core_id = " , core_id , " IA32_PERF_GLOBAL_STATUS: " , overflows); msr->read(INST_RETIRED_ADDR, &cInstRetiredAny); msr->read(CPU_CLK_UNHALTED_THREAD_ADDR, &cCpuClkUnhaltedThread); @@ -5698,6 +5700,7 @@ void BasicCounterState::readAndAggregate(std::shared_ptr msr) msr->lock(); msr->read(PERF_METRICS_ADDR, &perfMetrics); msr->read(TOPDOWN_SLOTS_ADDR, &slots); + DBG(3, "core_id = " , core_id , " PERF_METRICS = ", perfMetrics, " TOPDOWN_SLOTS = ", slots); msr->write(PERF_METRICS_ADDR, 0); msr->write(TOPDOWN_SLOTS_ADDR, 0); cFrontendBoundSlots = extract_bits(perfMetrics, 16, 23); diff --git a/src/msr.cpp b/src/msr.cpp index 635ee46f..cdbfc433 100644 --- a/src/msr.cpp +++ b/src/msr.cpp @@ -258,13 +258,17 @@ int32 MsrHandle::write(uint64 msr_number, uint64 value) std::cout << "DEBUG: writing MSR 0x" << std::hex << msr_number << " value 0x" << value << " on cpu " << std::dec << cpu_id << std::endl; #endif if (fd < 0) return 0; + DBG(4, "core_id = ", cpu_id, " writing MSR 0x", std::hex, msr_number, " value 0x", value, std::dec); return ::pwrite(fd, (const void *)&value, sizeof(uint64), msr_number); } int32 MsrHandle::read(uint64 msr_number, uint64 * value) { if (fd < 0) return 0; - return ::pread(fd, (void *)value, sizeof(uint64), msr_number); + assert(value); + const auto ret = ::pread(fd, (void *)value, sizeof(uint64), msr_number); + DBG(4, "core_id = ", cpu_id, " reading MSR 0x", std::hex, msr_number, " value 0x", *value, std::dec); + return ret; } #endif diff --git a/src/topologyentry.h b/src/topologyentry.h index 4c94d4ca..ff92f572 100644 --- a/src/topologyentry.h +++ b/src/topologyentry.h @@ -116,11 +116,13 @@ struct PCM_API TopologyEntry // describes a core inline void fillEntry(TopologyEntry & entry, const uint32 & smtMaskWidth, const uint32 & coreMaskWidth, const uint32 & l2CacheMaskShift, const int apic_id) { + DBG(1, "entry.os_id = ", entry.os_id, " apic_id = ", apic_id); entry.thread_id = smtMaskWidth ? extract_bits_32(apic_id, 0, smtMaskWidth - 1) : 0; entry.core_id = (smtMaskWidth + coreMaskWidth) ? extract_bits_32(apic_id, smtMaskWidth, smtMaskWidth + coreMaskWidth - 1) : 0; entry.socket_id = extract_bits_32(apic_id, smtMaskWidth + coreMaskWidth, 31); entry.tile_id = extract_bits_32(apic_id, l2CacheMaskShift, 31); entry.socket_unique_core_id = entry.core_id; + DBG(1, "entry.os_id = ", entry.os_id, " apic_id = ", apic_id, " entry.thread_id = ", entry.thread_id, " entry.core_id = ", entry.core_id, " entry.socket_id = ", entry.socket_id , " entry.tile_id = ", entry.tile_id, " entry.socket_unique_core_id = ", entry.socket_unique_core_id); } inline bool initCoreMasks(uint32 & smtMaskWidth, uint32 & coreMaskWidth, uint32 & l2CacheMaskShift, uint32 & l3CacheMaskShift) @@ -143,6 +145,7 @@ inline bool initCoreMasks(uint32 & smtMaskWidth, uint32 & coreMaskWidth, uint32 } levelType = extract_bits_32(cpuid_args.array[2], 8, 15); levelShift = extract_bits_32(cpuid_args.array[0], 0, 4); + DBG(1, "levelType = ", levelType, " levelShift = ", levelShift); switch (levelType) { case 1: //level type is SMT, so levelShift is the SMT_Mask_Width @@ -251,6 +254,7 @@ inline bool initCoreMasks(uint32 & smtMaskWidth, uint32 & coreMaskWidth, uint32 } #endif } + DBG(1, "smtMaskWidth = ", smtMaskWidth, " coreMaskWidth = ", coreMaskWidth, " l2CacheMaskShift = ", l2CacheMaskShift, " l3CacheMaskShift = ", l3CacheMaskShift); return true; } From 824fca3e07e5f42b698ee1765795c7c9762a6ddb Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Wed, 3 Dec 2025 14:13:27 +0100 Subject: [PATCH 2/8] avoid side-effects in DBG print: socketIdMap was modified Change-Id: I3f217431af2ecb076c6b2bbf315b2f25c7a3ef25 --- src/cpucounters.cpp | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/src/cpucounters.cpp b/src/cpucounters.cpp index 249c9e5e..f5817da5 100644 --- a/src/cpucounters.cpp +++ b/src/cpucounters.cpp @@ -1483,7 +1483,8 @@ bool PCM::discoverSystemTopology() // use map to change apic socket id to the logical socket id for (int i = 0; (i < (int)num_cores) && (!socketIdMap.empty()); ++i) { - DBG(2, "socket_id: ", topology[i].socket_id, ", socketIdMap tells me: ", socketIdMap[topology[i].socket_id]); + DBG(2, "socket_id: ", topology[i].socket_id, ", socketIdMap tells me: ", + (socketIdMap.find(topology[i].socket_id) == socketIdMap.end()) ? (std::string("N/A")): std::to_string(socketIdMap[topology[i].socket_id])); if(isCoreOnline((int32)i)) topology[i].socket_id = socketIdMap[topology[i].socket_id]; } @@ -5715,22 +5716,22 @@ void BasicCounterState::readAndAggregate(std::shared_ptr msr) cHeavyOpsSlots = extract_bits(perfMetrics, 32 + 0*8, 32 + 0*8 + 7); } const double total = double(cFrontendBoundSlots + cBadSpeculationSlots + cBackendBoundSlots + cRetiringSlots); - if (total != 0) + if (true) { - cFrontendBoundSlots = m->FrontendBoundSlots[core_id] += uint64((double(cFrontendBoundSlots) / total) * double(slots)); - cBadSpeculationSlots = m->BadSpeculationSlots[core_id] += uint64((double(cBadSpeculationSlots) / total) * double(slots)); - cBackendBoundSlots = m->BackendBoundSlots[core_id] += uint64((double(cBackendBoundSlots) / total) * double(slots)); - cRetiringSlots = m->RetiringSlots[core_id] += uint64((double(cRetiringSlots) / total) * double(slots)); + cFrontendBoundSlots = m->FrontendBoundSlots[core_id] += (total != 0) ? uint64((double(cFrontendBoundSlots) / total) * double(slots)) : 0; + cBadSpeculationSlots = m->BadSpeculationSlots[core_id] += (total != 0) ? uint64((double(cBadSpeculationSlots) / total) * double(slots)) : 0; + cBackendBoundSlots = m->BackendBoundSlots[core_id] += (total != 0) ? uint64((double(cBackendBoundSlots) / total) * double(slots)) : 0; + cRetiringSlots = m->RetiringSlots[core_id] += (total != 0) ? uint64((double(cRetiringSlots) / total) * double(slots)) : 0; if (m->isHWTMAL2Supported()) { - cMemBoundSlots = m->MemBoundSlots[core_id] += uint64((double(cMemBoundSlots) / total) * double(slots)); - cFetchLatSlots = m->FetchLatSlots[core_id] += uint64((double(cFetchLatSlots) / total) * double(slots)); - cBrMispredSlots = m->BrMispredSlots[core_id] += uint64((double(cBrMispredSlots) / total) * double(slots)); - cHeavyOpsSlots = m->HeavyOpsSlots[core_id] += uint64((double(cHeavyOpsSlots) / total) * double(slots)); + cMemBoundSlots = m->MemBoundSlots[core_id] += (total != 0) ? uint64((double(cMemBoundSlots) / total) * double(slots)) : 0; + cFetchLatSlots = m->FetchLatSlots[core_id] += (total != 0) ? uint64((double(cFetchLatSlots) / total) * double(slots)) : 0; + cBrMispredSlots = m->BrMispredSlots[core_id] += (total != 0) ? uint64((double(cBrMispredSlots) / total) * double(slots)) : 0; + cHeavyOpsSlots = m->HeavyOpsSlots[core_id] += (total != 0) ? uint64((double(cHeavyOpsSlots) / total) * double(slots)) : 0; } } cAllSlotsRaw = m->AllSlotsRaw[core_id] += slots; - DBG(3, slots , " " , cFrontendBoundSlots , " " , cBadSpeculationSlots , " " , cBackendBoundSlots , " " , cRetiringSlots); + DBG(3, "HWTMAL1: ", slots , " " , cFrontendBoundSlots , " " , cBadSpeculationSlots , " " , cBackendBoundSlots , " " , cRetiringSlots); msr->unlock(); } } From 7f2ccb21c30207c989d605215c9b019c4bb09b61 Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Wed, 3 Dec 2025 14:15:01 +0100 Subject: [PATCH 3/8] fix core_id computation when coreMaskWidth = 0 Change-Id: I3181be791f517a15e4ca0a937b0ca4675419ea9f --- src/topologyentry.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/topologyentry.h b/src/topologyentry.h index ff92f572..8a7cb6e9 100644 --- a/src/topologyentry.h +++ b/src/topologyentry.h @@ -118,7 +118,7 @@ inline void fillEntry(TopologyEntry & entry, const uint32 & smtMaskWidth, const { DBG(1, "entry.os_id = ", entry.os_id, " apic_id = ", apic_id); entry.thread_id = smtMaskWidth ? extract_bits_32(apic_id, 0, smtMaskWidth - 1) : 0; - entry.core_id = (smtMaskWidth + coreMaskWidth) ? extract_bits_32(apic_id, smtMaskWidth, smtMaskWidth + coreMaskWidth - 1) : 0; + entry.core_id = coreMaskWidth ? extract_bits_32(apic_id, smtMaskWidth, smtMaskWidth + coreMaskWidth - 1) : 0; entry.socket_id = extract_bits_32(apic_id, smtMaskWidth + coreMaskWidth, 31); entry.tile_id = extract_bits_32(apic_id, l2CacheMaskShift, 31); entry.socket_unique_core_id = entry.core_id; From 8fdf97bc9636792e05e4be82421efc1c9aec980f Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Thu, 4 Dec 2025 08:48:30 +0100 Subject: [PATCH 4/8] rename default mode into optimized power mode Change-Id: I6a735674deece7c2f8ad42a79ba82a889e24abc2 --- doc/LATENCY-OPTIMIZED-MODE.md | 12 ++++++------ scripts/bhs-power-mode.ps1 | 6 +++--- scripts/bhs-power-mode.sh | 6 +++--- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/doc/LATENCY-OPTIMIZED-MODE.md b/doc/LATENCY-OPTIMIZED-MODE.md index 6cf74314..a894d38b 100644 --- a/doc/LATENCY-OPTIMIZED-MODE.md +++ b/doc/LATENCY-OPTIMIZED-MODE.md @@ -8,16 +8,16 @@ The hardware monitors the average CPU utilization across all cores at regular in The screenshot above presents real-time data on uncore frequency statistics, measured in GHz, from a dual-socket platform (represented by two rows). Each socket includes five dies (organized into five columns). The first three dies contain CORes (COR), Last Level Cache (LLC), and Memory controllers (M), collectively referred to as CORLLCM. The final two dies are IO dies. -The ELC control has parameters that can be adjusted either through BIOS or software tools. The default parameter configuration is optimized for performance per watt, ensuring power efficiency. The alternative configuration, known as Latency Optimized Mode, prioritizes maximum performance. +The ELC control has parameters that can be adjusted either through BIOS or software tools. The default parameter configuration can be optimized for performance per watt, ensuring power efficiency. The alternative configuration, known as Latency Optimized Mode, prioritizes maximum performance. Below are the PCM statistics from a system operating in Latency Optimized Mode: ![Uncore Frequency Statistics Latency Optimized Mode](https://github.com/user-attachments/assets/70310bbc-725b-4450-af7a-1db2c04291dd) ## BIOS Options for Latency Optimized Mode -The BIOS option for selecting the Default or Latency Optimized Mode can typically be located in the following menus, depending on the BIOS version and OEM vendor: +The BIOS option for selecting the Optimized Power Mode or Latency Optimized Mode can typically be located in the following menus, depending on the BIOS version and OEM vendor: - **Socket Configuration** -> **Advanced Power Management** -> **CPU – Advanced PM Tuning** -> **Latency Optimized Mode** (Disabled or Enabled) -- **System Utilities** -> **System Configuration** -> **BIOS/Platform Configuration (RBSU)** -> **Power and Performance Options** -> **Advanced Power Options** -> **Efficiency Latency Control** (Default or Latency Optimized mode) +- **System Utilities** -> **System Configuration** -> **BIOS/Platform Configuration (RBSU)** -> **Power and Performance Options** -> **Advanced Power Options** -> **Efficiency Latency Control** (Default (Optimized Power Mode) or Latency Optimized mode) Should this BIOS option be unavailable or if there is a preference to change the mode during runtime, the PCM repository provides scripts for changing this mode. @@ -46,16 +46,16 @@ Windows: .\bhs-power-mode.ps1 --latency-optimized-mode ``` -### Restoring the Default Mode +### Setting Optimized Power Mode Linux/FreeBSD/UNIX: ``` -bash bhs-power-mode.sh --default +bash bhs-power-mode.sh --optimized-power-mode ``` Windows: ``` -.\bhs-power-mode.ps1 --default +.\bhs-power-mode.ps1 --optimized-power-mode ``` diff --git a/scripts/bhs-power-mode.ps1 b/scripts/bhs-power-mode.ps1 index 9ebb4a5f..dd360f50 100644 --- a/scripts/bhs-power-mode.ps1 +++ b/scripts/bhs-power-mode.ps1 @@ -3,7 +3,7 @@ Write-Output "Birch Stream Power Mode Utility" Write-Output "" Write-Output " Options:" -Write-Output " --default : set default power mode" +Write-Output " --optimized-power-mode : set optimized power mode" Write-Output " --latency-optimized-mode : set latency optimized mode" Write-Output "" @@ -29,8 +29,8 @@ $output -split "`n" | ForEach-Object { } } -if ($args[0] -eq "--default") { - Write-Output "Setting default mode..." +if ($args[0] -eq "--optimized-power-mode") { + Write-Output "Setting optimized power mode..." foreach ($die in $io_dies) { # EFFICIENCY_LATENCY_CTRL_RATIO (Uncore IO) diff --git a/scripts/bhs-power-mode.sh b/scripts/bhs-power-mode.sh index c842d1f8..4935f1c3 100644 --- a/scripts/bhs-power-mode.sh +++ b/scripts/bhs-power-mode.sh @@ -5,7 +5,7 @@ echo "Birch Stream Power Mode Utility" echo "" echo " Options:" -echo " --default : set default power mode" +echo " --optimized-power-mode : set optimized power mode" echo " --latency-optimized-mode : set latency optimized mode" echo @@ -29,8 +29,8 @@ while read -r line; do fi done <<< "$output" -if [ "$1" == "--default" ]; then - echo "Setting default mode..." +if [ "$1" == "--optimized-power-mode" ]; then + echo "Setting optimized power mode..." for die in "${io_dies[@]}"; do # EFFICIENCY_LATENCY_CTRL_RATIO (Uncore IO) From 03cc68ca7047e85cc9dd5970e3ef013068fe1741 Mon Sep 17 00:00:00 2001 From: Roman Dementiev Date: Thu, 4 Dec 2025 10:03:56 +0100 Subject: [PATCH 5/8] Update doc/LATENCY-OPTIMIZED-MODE.md Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- doc/LATENCY-OPTIMIZED-MODE.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/LATENCY-OPTIMIZED-MODE.md b/doc/LATENCY-OPTIMIZED-MODE.md index a894d38b..e7c629a2 100644 --- a/doc/LATENCY-OPTIMIZED-MODE.md +++ b/doc/LATENCY-OPTIMIZED-MODE.md @@ -17,7 +17,7 @@ Below are the PCM statistics from a system operating in Latency Optimized Mode: The BIOS option for selecting the Optimized Power Mode or Latency Optimized Mode can typically be located in the following menus, depending on the BIOS version and OEM vendor: - **Socket Configuration** -> **Advanced Power Management** -> **CPU – Advanced PM Tuning** -> **Latency Optimized Mode** (Disabled or Enabled) -- **System Utilities** -> **System Configuration** -> **BIOS/Platform Configuration (RBSU)** -> **Power and Performance Options** -> **Advanced Power Options** -> **Efficiency Latency Control** (Default (Optimized Power Mode) or Latency Optimized mode) +- **System Utilities** -> **System Configuration** -> **BIOS/Platform Configuration (RBSU)** -> **Power and Performance Options** -> **Advanced Power Options** -> **Efficiency Latency Control** (Default (Optimized Power Mode) or Latency Optimized Mode) Should this BIOS option be unavailable or if there is a preference to change the mode during runtime, the PCM repository provides scripts for changing this mode. From 077e818f61118890e53ce6cd4873ddd4e483f20e Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Wed, 10 Dec 2025 13:43:20 +0100 Subject: [PATCH 6/8] macOS-13 is deprecated, use macos-15-intel instead Change-Id: I607d2266e80fab8a63892fca04eb4d74fe116baf --- .github/workflows/macos-scan-build.yml | 2 +- .github/workflows/macosx_build.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/macos-scan-build.yml b/.github/workflows/macos-scan-build.yml index 8420529a..b2a130ac 100644 --- a/.github/workflows/macos-scan-build.yml +++ b/.github/workflows/macos-scan-build.yml @@ -12,7 +12,7 @@ permissions: jobs: build: - runs-on: macOS-13 + runs-on: macos-15-intel steps: - name: Harden Runner diff --git a/.github/workflows/macosx_build.yml b/.github/workflows/macosx_build.yml index 937f3ec8..dca42766 100644 --- a/.github/workflows/macosx_build.yml +++ b/.github/workflows/macosx_build.yml @@ -12,7 +12,7 @@ permissions: jobs: build: - runs-on: macOS-13 + runs-on: macos-15-intel steps: - name: Harden Runner From 5a92e40fdf971280370ba5f5964441a3cb6b2a48 Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Wed, 10 Dec 2025 13:46:11 +0100 Subject: [PATCH 7/8] do not select old Xcode 14.3.1 Change-Id: I9b7ac63504a3c081a064a7b985ea7374a8a6d4f6 --- .github/workflows/macos-scan-build.yml | 3 --- .github/workflows/macosx_build.yml | 3 --- 2 files changed, 6 deletions(-) diff --git a/.github/workflows/macos-scan-build.yml b/.github/workflows/macos-scan-build.yml index b2a130ac..863fecd3 100644 --- a/.github/workflows/macos-scan-build.yml +++ b/.github/workflows/macos-scan-build.yml @@ -20,9 +20,6 @@ jobs: with: egress-policy: audit - - name: Select Xcode 14.3.1 - run: sudo xcode-select -s /Applications/Xcode_14.3.1.app/Contents/Developer - - name: Set SDKROOT and verify kernel headers shell: bash run: | diff --git a/.github/workflows/macosx_build.yml b/.github/workflows/macosx_build.yml index dca42766..fe290ef6 100644 --- a/.github/workflows/macosx_build.yml +++ b/.github/workflows/macosx_build.yml @@ -20,9 +20,6 @@ jobs: with: egress-policy: audit - - name: Select Xcode 14.3.1 - run: sudo xcode-select -s /Applications/Xcode_14.3.1.app/Contents/Developer - - name: Set SDKROOT and verify kernel headers shell: bash run: | From 66ec89ea782f6de496290c9910ac89580931cca7 Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Thu, 11 Dec 2025 14:21:45 +0100 Subject: [PATCH 8/8] install llvm 15 Change-Id: I56afba39419063b8c420aac85e61c8c2414c5ac5 --- .github/workflows/macos-scan-build.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/macos-scan-build.yml b/.github/workflows/macos-scan-build.yml index 863fecd3..565a7d86 100644 --- a/.github/workflows/macos-scan-build.yml +++ b/.github/workflows/macos-scan-build.yml @@ -30,6 +30,10 @@ jobs: exit 1 } + - name: install llvm 15 + run: | + brew install llvm@15 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: submodules: recursive