diff --git a/.gitignore b/.gitignore index fdf68e8c..86e6584c 100644 --- a/.gitignore +++ b/.gitignore @@ -32,4 +32,5 @@ latex/ .vs/ .idea/ build -src/simdjson \ No newline at end of file +src/simdjson +.vscode/ diff --git a/pcm.spec b/pcm.spec index dfc8ff51..b234980d 100644 --- a/pcm.spec +++ b/pcm.spec @@ -72,7 +72,6 @@ rm -rf $RPM_BUILD_ROOT %{_sbindir}/pcm-core %{_sbindir}/pcm-iio %{_sbindir}/pcm-latency -%{_sbindir}/pcm-lspci %{_sbindir}/pcm-memory %{_sbindir}/pcm-msr %{_sbindir}/pcm-mmio diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 4cf1a82a..98c34651 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -4,7 +4,7 @@ include(FindOpenSSL) # All pcm-* executables -set(PROJECT_NAMES pcm pcm-numa pcm-latency pcm-power pcm-msr pcm-memory pcm-tsx pcm-pcie pcm-core pcm-iio pcm-lspci pcm-pcicfg pcm-mmio pcm-tpmi pcm-raw pcm-accel) +set(PROJECT_NAMES pcm pcm-numa pcm-latency pcm-power pcm-msr pcm-memory pcm-tsx pcm-pcie pcm-core pcm-iio pcm-pcicfg pcm-mmio pcm-tpmi pcm-raw pcm-accel) set(MINIMUM_OPENSSL_VERSION 1.1.1) diff --git a/src/MacMSRDriver/PcmMsr/PcmMsr.cpp b/src/MacMSRDriver/PcmMsr/PcmMsr.cpp index 2e9b3154..a603a93c 100644 --- a/src/MacMSRDriver/PcmMsr/PcmMsr.cpp +++ b/src/MacMSRDriver/PcmMsr/PcmMsr.cpp @@ -64,10 +64,13 @@ void cpuGetTopoData(void* pTopos){ uint32 smtMaskWidth = 0; uint32 coreMaskWidth = 0; uint32 l2CacheMaskShift = 0; - initCoreMasks(smtMaskWidth, coreMaskWidth, l2CacheMaskShift); + uint32 l3CacheMaskShift = 0; + initCoreMasks(smtMaskWidth, coreMaskWidth, l2CacheMaskShift, l3CacheMaskShift); PCM_CPUID_INFO cpuid_args; pcm_cpuid(0xb, 0x0, cpuid_args); - fillEntry(entry, smtMaskWidth, coreMaskWidth, l2CacheMaskShift, cpuid_args.array[3]); + const auto apic_id = cpuid_args.array[3]; + fillEntry(entry, smtMaskWidth, coreMaskWidth, l2CacheMaskShift, apic_id); + entry.l3_cache_id = extract_bits_32(apic_id, l3CacheMaskShift, 31); } OSDefineMetaClassAndStructors(com_intel_driver_PcmMsr, IOService) diff --git a/src/cpucounters.cpp b/src/cpucounters.cpp index 2e18f7b4..742dd303 100644 --- a/src/cpucounters.cpp +++ b/src/cpucounters.cpp @@ -1101,6 +1101,7 @@ bool PCM::discoverSystemTopology() uint32 smtMaskWidth = 0; uint32 coreMaskWidth = 0; uint32 l2CacheMaskShift = 0; + uint32 l3CacheMaskShift = 0; struct domain { @@ -1111,7 +1112,7 @@ bool PCM::discoverSystemTopology() { TemporalThreadAffinity aff0(0); - if (initCoreMasks(smtMaskWidth, coreMaskWidth, l2CacheMaskShift) == false) + if (initCoreMasks(smtMaskWidth, coreMaskWidth, l2CacheMaskShift, l3CacheMaskShift) == false) { std::cerr << "ERROR: Major problem? No leaf 0 under cpuid function 11.\n"; return false; @@ -1151,20 +1152,18 @@ bool PCM::discoverSystemTopology() for (size_t l = 0; l < topologyDomains.size(); ++l) { topologyDomainMap[topologyDomains[l].type] = topologyDomains[l]; -#if 0 - std::cerr << "Topology level: " << l << - " type: " << topologyDomains[l].type << - " (" << TopologyEntry::getDomainTypeStr(topologyDomains[l].type) << ")" << - " width: " << topologyDomains[l].width << - " levelShift: " << topologyDomains[l].levelShift << - " nextLevelShift: " << topologyDomains[l].nextLevelShift << "\n"; -#endif + DBG(1 , "Topology level: " , l , + " type: " , topologyDomains[l].type , + " (" , TopologyEntry::getDomainTypeStr(topologyDomains[l].type) , ")" , + " width: " , topologyDomains[l].width , + " levelShift: " , topologyDomains[l].levelShift , + " nextLevelShift: " , topologyDomains[l].nextLevelShift); } } } #ifndef __APPLE__ - auto populateEntry = [&topologyDomainMap,&smtMaskWidth, &coreMaskWidth, &l2CacheMaskShift](TopologyEntry& entry) + auto populateEntry = [&topologyDomainMap,&smtMaskWidth, &coreMaskWidth, &l2CacheMaskShift, &l3CacheMaskShift](TopologyEntry& entry) { auto getAPICID = [&](const uint32 leaf) { @@ -1218,6 +1217,7 @@ bool PCM::discoverSystemTopology() { fillEntry(entry, smtMaskWidth, coreMaskWidth, l2CacheMaskShift, getAPICID(0xb)); } + entry.l3_cache_id = extract_bits_32(getAPICID(0xb), l3CacheMaskShift, 31); }; #endif @@ -3231,7 +3231,7 @@ void PCM::printDetailedSystemTopology(const int detailLevel) std::cerr << "Tile_Id "; if (detailLevel > 0) std::cerr << "Die_Id Die_Group_Id "; std::cerr << "Package_Id Core_Type Native_CPU_Model\n"; - std::map > os_id_by_core, os_id_by_tile, core_id_by_socket; + std::map > os_id_by_core, os_id_by_tile, core_id_by_socket, os_id_by_l3_cache; size_t counter = 0; for (auto it = topology.begin(); it != topology.end(); ++it) { @@ -3252,6 +3252,7 @@ void PCM::printDetailedSystemTopology(const int detailLevel) // add socket offset to distinguish cores and tiles from different sockets os_id_by_core[(it->socket_id << 15) + it->core_id].push_back(it->os_id); os_id_by_tile[(it->socket_id << 15) + it->tile_id].push_back(it->os_id); + os_id_by_l3_cache[(it->socket_id << 15) + it->l3_cache_id].push_back(it->os_id); ++counter; } @@ -3288,6 +3289,16 @@ void PCM::printDetailedSystemTopology(const int detailLevel) } std::cerr << ")"; } + std::cerr << "\nL3$ "; + for (auto core = os_id_by_l3_cache.begin(); core != os_id_by_l3_cache.end(); ++core) + { + auto os_id = core->second.begin(); + std::cerr << "(" << *os_id; + for (++os_id; os_id != core->second.end(); ++os_id) { + std::cerr << "," << *os_id; + } + std::cerr << ")"; + } std::cerr << "\n"; std::cerr << "\n"; } @@ -7560,14 +7571,23 @@ void PCM::getPCICFGPMUsFromDiscovery(const unsigned int BoxType, const size_t s, { std::vector > CounterControlRegs, CounterValueRegs; const auto n_regs = uncorePMUDiscovery->getBoxNumRegs(BoxType, s, pos); - auto makeRegister = [](const uint64 rawAddr) + auto makeRegister = [&pos, &numBoxes, &BoxType, &s](const uint64 rawAddr) { #ifndef PCI_ENABLE constexpr auto PCI_ENABLE = 0x80000000ULL; #endif UncorePMUDiscovery::PCICFGAddress Addr; Addr.raw = rawAddr; - assert(Addr.raw & PCI_ENABLE); + if ((Addr.raw & PCI_ENABLE) == 0) + { + std::cerr << "PCM Error: PCI_ENABLE bit not set in address 0x" << std::hex << Addr.raw << std::dec << "\n"; + std::cerr << "This is likely a bug in the uncore PMU discovery BIOS table. Contact your BIOS vendor.\n"; + std::cerr << "Socket: " << s << "\n"; + std::cerr << "Box type: " << BoxType << "\n"; + std::cerr << "Box position: " << pos << "/" << numBoxes << "\n"; + std::cerr << "Address: " << Addr.getStr() << "\n"; + return std::shared_ptr(); + } try { auto handle = std::make_shared(0, (uint32)Addr.fields.bus, (uint32)Addr.fields.device, diff --git a/src/lspci.cpp b/src/lspci.cpp index 848c357c..77ebc9d6 100644 --- a/src/lspci.cpp +++ b/src/lspci.cpp @@ -94,26 +94,6 @@ bool probe_pci(struct pci *p) return p->exist; } -void print_pci(struct pci p, const PCIDB & pciDB) -{ - printf("Parent bridge info:"); - printf("%x:%x.%d [%04x:%04x] %s %s %d P:%x S:%x S:%x ", - p.bdf.busno, p.bdf.devno, p.bdf.funcno, - p.vendor_id, p.device_id, - (pciDB.first.count(p.vendor_id) > 0)?pciDB.first.at(p.vendor_id).c_str():"unknown vendor", - (pciDB.second.count(p.vendor_id) > 0 && pciDB.second.at(p.vendor_id).count(p.device_id) > 0)?pciDB.second.at(p.vendor_id).at(p.device_id).c_str():"unknown device", - p.header_type, - p.primary_bus_number, p.secondary_bus_number, p.subordinate_bus_number); - printf("Device info:"); - printf("%x:%x.%d [%04x:%04x] %s %s %d Gen%d x%d\n", - p.bdf.busno, p.bdf.devno, p.bdf.funcno, - p.vendor_id, p.device_id, - (pciDB.first.count(p.vendor_id) > 0)?pciDB.first.at(p.vendor_id).c_str():"unknown vendor", - (pciDB.second.count(p.vendor_id) > 0 && pciDB.second.at(p.vendor_id).count(p.device_id) > 0)?pciDB.second.at(p.vendor_id).at(p.device_id).c_str():"unknown device", - p.header_type, - p.link_speed, p.link_width); -} - void load_PCIDB(PCIDB & pciDB) { std::ifstream in(PCI_IDS_PATH); diff --git a/src/lspci.h b/src/lspci.h index 23eda0d7..a49bd714 100644 --- a/src/lspci.h +++ b/src/lspci.h @@ -242,20 +242,6 @@ struct pci { bool isIntelDeviceById(uint16_t device_id) const { return (isIntelDevice() && (this->device_id == device_id)); } }; -struct iio_skx { - struct { - struct { - struct pci root_pci_dev; /* single device represent root port */ - std::vector child_pci_devs; /* Contain child switch and end-point devices */ - } parts[4]{}; /* part 0, 1, 2, 3 */ - uint8_t busno{}; /* holding busno for each IIO stack */ - std::string stack_name{}; - std::vector values{}; - bool flipped = false; - } stacks[6]; /* iio stack 0, 1, 2, 3, 4, 5 */ - uint32_t socket_id{}; -}; - struct iio_bifurcated_part { int part_id{0}; /* single device represent root port */ @@ -296,8 +282,6 @@ bool probe_pci(struct pci *p); */ typedef std::pair< std::map ,std::map< int, std::map > > PCIDB; -void print_pci(struct pci p, const PCIDB & pciDB); - void load_PCIDB(PCIDB & pciDB); } // namespace pcm diff --git a/src/pcm-iio-pmu.cpp b/src/pcm-iio-pmu.cpp index cd108fe7..e712fcc0 100644 --- a/src/pcm-iio-pmu.cpp +++ b/src/pcm-iio-pmu.cpp @@ -55,7 +55,7 @@ string build_pci_header(const PCIDB & pciDB, uint32_t column_width, const struct for (auto& part : p.parts_no) { s += std::to_string(part) + ", "; } - s += "\b\b "; + s.erase(s.size() - 2); } /* row with data */ diff --git a/src/pcm-lspci.cpp b/src/pcm-lspci.cpp deleted file mode 100644 index e552a995..00000000 --- a/src/pcm-lspci.cpp +++ /dev/null @@ -1,135 +0,0 @@ -// SPDX-License-Identifier: BSD-3-Clause -// Copyright (c) 2017-2022, Intel Corporation - -// written by Patrick Lu -#include "cpucounters.h" -#ifdef _MSC_VER -#include -#include "windows/windriver.h" -#else -#include -#endif -#include -#include -#include -#ifdef _MSC_VER -#include "freegetopt/getopt.h" -#endif - -#include "lspci.h" -using namespace std; -using namespace pcm; - -void scanBus(int bus, const PCIDB & pciDB) -{ - if(!PciHandleType::exists(0, bus, 8, 2)) return; - - std::cout << "BUS 0x" << std::hex << bus << std::dec << "\n"; - - struct iio_skx iio_skx; - - PciHandleType h(0, bus, 8, 2); - uint32 cpubusno = 0; - - h.read32(0xcc, &cpubusno); // CPUBUSNO register - iio_skx.stacks[0].busno = cpubusno & 0xff; - iio_skx.stacks[1].busno = (cpubusno >> 8) & 0xff; - iio_skx.stacks[2].busno = (cpubusno >> 16) & 0xff; - iio_skx.stacks[3].busno = (cpubusno >> 24) & 0xff; - h.read32(0xd0, &cpubusno); // CPUBUSNO1 register - iio_skx.stacks[4].busno = cpubusno & 0xff; - iio_skx.stacks[5].busno = (cpubusno >> 8) & 0xff; - - for (uint8_t stack = 0; stack < 6; stack++) { - uint8_t busno = iio_skx.stacks[stack].busno; - std::cout << "stack" << unsigned(stack) << std::hex << ":0x" << unsigned(busno) << std::dec << ",(" << unsigned(busno) << ")\n"; - for (uint8_t part = 0; part < 3; part++) { - struct pci *pci = &iio_skx.stacks[stack].parts[part].root_pci_dev; - struct bdf *bdf = &pci->bdf; - bdf->busno = busno; - bdf->devno = part; - bdf->funcno = 0; - /* This is a workaround to catch some IIO stack does not exist */ - if (stack != 0 && busno == 0) - pci->exist = false; - else - (void)probe_pci(pci); - } - } - for (uint8_t stack = 0; stack < 6; stack++) { - for (uint8_t part = 0; part < 4; part++) { - struct pci p = iio_skx.stacks[stack].parts[part].root_pci_dev; - if (!p.exist) - continue; - for (uint8_t b = p.secondary_bus_number; b <= p.subordinate_bus_number; b++) { /* FIXME: for 0:0.0, we may need to scan from secondary switch down; lgtm [cpp/fixme-comment] */ - for (uint8_t d = 0; d < 32; d++) { - for (uint8_t f = 0; f < 8; f++) { - struct pci pci; - pci.exist = false; - pci.bdf.busno = b; - pci.bdf.devno = d; - pci.bdf.funcno = f; - if (probe_pci(&pci)) - iio_skx.stacks[stack].parts[part].child_pci_devs.push_back(pci); - } - } - } - } - } - - for (uint8_t stack = 1; stack < 6; stack++) { /* XXX: Maybe there is no point to display all built-in devices on DMI/CBDMA stacks, if so, change stack = 1 */ - for (uint8_t part = 0; part < 4; part++) { - vector v = iio_skx.stacks[stack].parts[part].child_pci_devs; - struct pci pp = iio_skx.stacks[stack].parts[part].root_pci_dev; - if (pp.exist) - print_pci(pp, pciDB); - for (vector::const_iterator iunit = v.begin(); iunit != v.end(); ++iunit) { - struct pci p = *iunit; - if (p.exist) - print_pci(p, pciDB); - } - } - } -} - - -PCM_MAIN_NOTHROW; - -int mainThrows(int /*argc*/, char * /*argv*/[]) -{ - PCIDB pciDB; - load_PCIDB(pciDB); - PCM * m = PCM::getInstance(); - - if (!m->isSkxCompatible()) - { - cerr << "PCI tree display is currently not supported for processor family/model 0x" << std::hex << m->getCPUFamilyModel() << std::dec << "\n"; - } - else - { - std::cout << "\n Display PCI tree information\n\n"; - for (int bus = 0; bus < 256; ++bus) - scanBus(bus, pciDB); - } - - cerr << "Scanning all devices in group 0\n"; - for (uint32 bus = 0; bus < 256; ++bus) - { - for (uint32 device = 0; device < 32; ++device) - { - for (uint32 function = 0; function < 8; ++function) - { - if (PciHandleType::exists(0, bus, device, function)) - { - PciHandleType h(0, bus, device, function); - uint32 value = 0; - h.read32(0, &value); - const uint32 vendor = extract_bits_32(value, 0, 15); - const uint32 deviceID = extract_bits_32(value, 16, 31); - std::cout << "0:" << bus << ":" << device << ":" << function << " vendor 0x" << std::hex << vendor << " device 0x" << deviceID << std::dec << "\n"; - } - } - } - } - return 0; -} diff --git a/src/topologyentry.h b/src/topologyentry.h index ffbcdf67..1e7094b9 100644 --- a/src/topologyentry.h +++ b/src/topologyentry.h @@ -4,6 +4,9 @@ #pragma once #include "types.h" +#ifndef USER_KERNEL_SHARED +#include "debug.h" +#endif namespace pcm { @@ -25,6 +28,7 @@ struct PCM_API TopologyEntry // describes a core int32 die_grp_id; int32 socket_id; int32 socket_unique_core_id; + int32 l3_cache_id = -1; int32 native_cpu_model = -1; enum DomainTypeID { @@ -103,7 +107,7 @@ inline void fillEntry(TopologyEntry & entry, const uint32 & smtMaskWidth, const entry.socket_unique_core_id = entry.core_id; } -inline bool initCoreMasks(uint32 & smtMaskWidth, uint32 & coreMaskWidth, uint32 & l2CacheMaskShift) +inline bool initCoreMasks(uint32 & smtMaskWidth, uint32 & coreMaskWidth, uint32 & l2CacheMaskShift, uint32 & l3CacheMaskShift) { // init constants for CPU topology leaf 0xB // adapted from Topology Enumeration Reference code for Intel 64 Architecture @@ -154,24 +158,82 @@ inline bool initCoreMasks(uint32 & smtMaskWidth, uint32 & coreMaskWidth, uint32 (void) coreMaskWidth; // to suppress warnings on MacOS (unused vars) - #ifdef PCM_DEBUG_TOPOLOGY - uint32 threadsSharingL2; - #endif - uint32 l2CacheMaskWidth; + uint32 threadsSharingL2 = 0; + uint32 l2CacheMaskWidth = 0; pcm_cpuid(0x4, 2, cpuid_args); // get ID for L2 cache l2CacheMaskWidth = 1 + extract_bits_32(cpuid_args.array[0],14,25); // number of APIC IDs sharing L2 cache - #ifdef PCM_DEBUG_TOPOLOGY threadsSharingL2 = l2CacheMaskWidth; - #endif for( ; l2CacheMaskWidth > 1; l2CacheMaskWidth >>= 1) { l2CacheMaskShift++; } - #ifdef PCM_DEBUG_TOPOLOGY - std::cerr << "DEBUG: Number of threads sharing L2 cache = " << threadsSharingL2 - << " [the most significant bit = " << l2CacheMaskShift << "]\n"; - #endif + +#ifndef USER_KERNEL_SHARED + DBG(1, "Number of threads sharing L2 cache = " , threadsSharingL2, " [the most significant bit = " , l2CacheMaskShift , "]"); +#endif + + uint32 threadsSharingL3 = 0; + uint32 l3CacheMaskWidth = 0; + + pcm_cpuid(0x4, 3, cpuid_args); // get ID for L3 cache + l3CacheMaskWidth = 1 + extract_bits_32(cpuid_args.array[0], 14, 25); // number of APIC IDs sharing L3 cache + threadsSharingL3 = l3CacheMaskWidth; + for( ; l3CacheMaskWidth > 1; l3CacheMaskWidth >>= 1) + { + l3CacheMaskShift++; + } + +#ifndef USER_KERNEL_SHARED + DBG(1, "Number of threads sharing L3 cache = " , threadsSharingL3, " [the most significant bit = " , l3CacheMaskShift , "]"); +#endif + + (void) threadsSharingL2; // to suppress warnings on MacOS (unused vars) + (void) threadsSharingL3; // to suppress warnings on MacOS (unused vars) + + // Validate l3CacheMaskShift and ensure the bit range is correct + if (l3CacheMaskShift > 31) + { +#ifndef USER_KERNEL_SHARED + DBG(0, "Invalid bit range for L3 cache ID extraction = ", l3CacheMaskShift); +#endif + return false; + } + +#ifndef USER_KERNEL_SHARED + uint32 it = 0; + + for (int i = 0; i < 100; ++i) + { + uint32 threadsSharingCache = 0; + uint32 CacheMaskWidth = 0; + uint32 CacheMaskShift = 0; + pcm_cpuid(0x4, it, cpuid_args); + const auto cacheType = extract_bits_32(cpuid_args.array[0], 0, 4); + if (cacheType == 0) + { + break; // no more caches + } + const char * cacheTypeStr = nullptr; + switch (cacheType) + { + case 1: cacheTypeStr = "data"; break; + case 2: cacheTypeStr = "instruction"; break; + case 3: cacheTypeStr = "unified"; break; + default: cacheTypeStr = "unknown"; break; + } + const auto level = extract_bits_32(cpuid_args.array[0], 5, 7); + CacheMaskWidth = 1 + extract_bits_32(cpuid_args.array[0], 14, 25); // number of APIC IDs sharing cache + threadsSharingCache = CacheMaskWidth; + for( ; CacheMaskWidth > 1; CacheMaskWidth >>= 1) + { + CacheMaskShift++; + } + DBG(1, "Max number of threads sharing L" , level , " " , cacheTypeStr , " cache = " , threadsSharingCache, " [the most significant bit = " , CacheMaskShift , "]", + " shift = " , CacheMaskShift); + ++it; + } +#endif } return true; } diff --git a/src/types.h b/src/types.h index cbfb6797..6fb1f296 100644 --- a/src/types.h +++ b/src/types.h @@ -19,6 +19,8 @@ #include #include #include +#include +#include #include #include #include diff --git a/src/utils.cpp b/src/utils.cpp index 02b51ec7..47bb7ffb 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -1342,6 +1342,16 @@ bool get_cpu_bus(uint32 msmDomain, uint32 msmBus, uint32 msmDev, uint32 msmFunc, return false; } + if (!cpuBusValid) + { + /** + * Return true because an unexpected device might appear in the OS due to specific configurations. + * This ensures the function does not fail in such cases and allows further processing. + */ + std::cerr << "CPUBUSNO_VALID is 0" << std::endl; + return true; + } + cpuBusNo.resize(8); for (int i = 0; i < 4; ++i) { h.read32(SPR_MSM_REG_CPUBUSNO0_OFFSET + i * 4, &cpuBusNo[i]); diff --git a/src/windows/pcm-lspci-win.cpp b/src/windows/pcm-lspci-win.cpp deleted file mode 100644 index b09a3cd0..00000000 --- a/src/windows/pcm-lspci-win.cpp +++ /dev/null @@ -1,6 +0,0 @@ -// pcm-lspci-win.cpp : Defines the entry point for the console application. -// - -#include "stdafx.h" - -#include "../pcm-lspci.cpp"