diff --git a/src/cpucounters.cpp b/src/cpucounters.cpp index a285689c..f5817da5 100644 --- a/src/cpucounters.cpp +++ b/src/cpucounters.cpp @@ -1140,6 +1140,7 @@ bool PCM::discoverSystemTopology() pcm_cpuid(0x1F, subleaf, cpuid_args); domain d; d.type = (TopologyEntry::DomainTypeID)extract_bits_32(cpuid_args.reg.ecx, 8, 15); + DBG(1 , "pcm_cpuid 0x1F cpuid_args.reg.ecx = " , cpuid_args.reg.ecx , " d.type = ", d.type); if (d.type == TopologyEntry::DomainTypeID::InvalidDomainTypeID) { break; @@ -1482,7 +1483,8 @@ bool PCM::discoverSystemTopology() // use map to change apic socket id to the logical socket id for (int i = 0; (i < (int)num_cores) && (!socketIdMap.empty()); ++i) { - DBG(2, "socket_id: ", topology[i].socket_id, ", socketIdMap tells me: ", socketIdMap[topology[i].socket_id]); + DBG(2, "socket_id: ", topology[i].socket_id, ", socketIdMap tells me: ", + (socketIdMap.find(topology[i].socket_id) == socketIdMap.end()) ? (std::string("N/A")): std::to_string(socketIdMap[topology[i].socket_id])); if(isCoreOnline((int32)i)) topology[i].socket_id = socketIdMap[topology[i].socket_id]; } @@ -4413,6 +4415,7 @@ PCM::ErrorCode PCM::programCoreCounters(const int i /* core */, MSR[i]->write(IA32_PERF_GLOBAL_OVF_CTRL, value); MSR[i]->write(IA32_CR_PERF_GLOBAL_CTRL, value); + DBG(3, "core_id = ", i, " wrote IA32_PERF_GLOBAL_OVF_CTRL and IA32_CR_PERF_GLOBAL_CTRL = 0x", std::hex, value, std::dec); } #ifdef PCM_USE_PERF else @@ -5679,7 +5682,7 @@ void BasicCounterState::readAndAggregate(std::shared_ptr msr) { { msr->read(IA32_PERF_GLOBAL_STATUS, &overflows); // read overflows - DBG(3, "Debug " , core_id , " IA32_PERF_GLOBAL_STATUS: " , overflows); + DBG(3, "core_id = " , core_id , " IA32_PERF_GLOBAL_STATUS: " , overflows); msr->read(INST_RETIRED_ADDR, &cInstRetiredAny); msr->read(CPU_CLK_UNHALTED_THREAD_ADDR, &cCpuClkUnhaltedThread); @@ -5698,6 +5701,7 @@ void BasicCounterState::readAndAggregate(std::shared_ptr msr) msr->lock(); msr->read(PERF_METRICS_ADDR, &perfMetrics); msr->read(TOPDOWN_SLOTS_ADDR, &slots); + DBG(3, "core_id = " , core_id , " PERF_METRICS = ", perfMetrics, " TOPDOWN_SLOTS = ", slots); msr->write(PERF_METRICS_ADDR, 0); msr->write(TOPDOWN_SLOTS_ADDR, 0); cFrontendBoundSlots = extract_bits(perfMetrics, 16, 23); @@ -5712,22 +5716,22 @@ void BasicCounterState::readAndAggregate(std::shared_ptr msr) cHeavyOpsSlots = extract_bits(perfMetrics, 32 + 0*8, 32 + 0*8 + 7); } const double total = double(cFrontendBoundSlots + cBadSpeculationSlots + cBackendBoundSlots + cRetiringSlots); - if (total != 0) + if (true) { - cFrontendBoundSlots = m->FrontendBoundSlots[core_id] += uint64((double(cFrontendBoundSlots) / total) * double(slots)); - cBadSpeculationSlots = m->BadSpeculationSlots[core_id] += uint64((double(cBadSpeculationSlots) / total) * double(slots)); - cBackendBoundSlots = m->BackendBoundSlots[core_id] += uint64((double(cBackendBoundSlots) / total) * double(slots)); - cRetiringSlots = m->RetiringSlots[core_id] += uint64((double(cRetiringSlots) / total) * double(slots)); + cFrontendBoundSlots = m->FrontendBoundSlots[core_id] += (total != 0) ? uint64((double(cFrontendBoundSlots) / total) * double(slots)) : 0; + cBadSpeculationSlots = m->BadSpeculationSlots[core_id] += (total != 0) ? uint64((double(cBadSpeculationSlots) / total) * double(slots)) : 0; + cBackendBoundSlots = m->BackendBoundSlots[core_id] += (total != 0) ? uint64((double(cBackendBoundSlots) / total) * double(slots)) : 0; + cRetiringSlots = m->RetiringSlots[core_id] += (total != 0) ? uint64((double(cRetiringSlots) / total) * double(slots)) : 0; if (m->isHWTMAL2Supported()) { - cMemBoundSlots = m->MemBoundSlots[core_id] += uint64((double(cMemBoundSlots) / total) * double(slots)); - cFetchLatSlots = m->FetchLatSlots[core_id] += uint64((double(cFetchLatSlots) / total) * double(slots)); - cBrMispredSlots = m->BrMispredSlots[core_id] += uint64((double(cBrMispredSlots) / total) * double(slots)); - cHeavyOpsSlots = m->HeavyOpsSlots[core_id] += uint64((double(cHeavyOpsSlots) / total) * double(slots)); + cMemBoundSlots = m->MemBoundSlots[core_id] += (total != 0) ? uint64((double(cMemBoundSlots) / total) * double(slots)) : 0; + cFetchLatSlots = m->FetchLatSlots[core_id] += (total != 0) ? uint64((double(cFetchLatSlots) / total) * double(slots)) : 0; + cBrMispredSlots = m->BrMispredSlots[core_id] += (total != 0) ? uint64((double(cBrMispredSlots) / total) * double(slots)) : 0; + cHeavyOpsSlots = m->HeavyOpsSlots[core_id] += (total != 0) ? uint64((double(cHeavyOpsSlots) / total) * double(slots)) : 0; } } cAllSlotsRaw = m->AllSlotsRaw[core_id] += slots; - DBG(3, slots , " " , cFrontendBoundSlots , " " , cBadSpeculationSlots , " " , cBackendBoundSlots , " " , cRetiringSlots); + DBG(3, "HWTMAL1: ", slots , " " , cFrontendBoundSlots , " " , cBadSpeculationSlots , " " , cBackendBoundSlots , " " , cRetiringSlots); msr->unlock(); } } diff --git a/src/msr.cpp b/src/msr.cpp index 635ee46f..cdbfc433 100644 --- a/src/msr.cpp +++ b/src/msr.cpp @@ -258,13 +258,17 @@ int32 MsrHandle::write(uint64 msr_number, uint64 value) std::cout << "DEBUG: writing MSR 0x" << std::hex << msr_number << " value 0x" << value << " on cpu " << std::dec << cpu_id << std::endl; #endif if (fd < 0) return 0; + DBG(4, "core_id = ", cpu_id, " writing MSR 0x", std::hex, msr_number, " value 0x", value, std::dec); return ::pwrite(fd, (const void *)&value, sizeof(uint64), msr_number); } int32 MsrHandle::read(uint64 msr_number, uint64 * value) { if (fd < 0) return 0; - return ::pread(fd, (void *)value, sizeof(uint64), msr_number); + assert(value); + const auto ret = ::pread(fd, (void *)value, sizeof(uint64), msr_number); + DBG(4, "core_id = ", cpu_id, " reading MSR 0x", std::hex, msr_number, " value 0x", *value, std::dec); + return ret; } #endif diff --git a/src/topologyentry.h b/src/topologyentry.h index 4c94d4ca..a0c0c661 100644 --- a/src/topologyentry.h +++ b/src/topologyentry.h @@ -116,11 +116,17 @@ struct PCM_API TopologyEntry // describes a core inline void fillEntry(TopologyEntry & entry, const uint32 & smtMaskWidth, const uint32 & coreMaskWidth, const uint32 & l2CacheMaskShift, const int apic_id) { + #ifndef USER_KERNEL_SHARED + DBG(1, "entry.os_id = ", entry.os_id, " apic_id = ", apic_id); + #endif entry.thread_id = smtMaskWidth ? extract_bits_32(apic_id, 0, smtMaskWidth - 1) : 0; - entry.core_id = (smtMaskWidth + coreMaskWidth) ? extract_bits_32(apic_id, smtMaskWidth, smtMaskWidth + coreMaskWidth - 1) : 0; + entry.core_id = coreMaskWidth ? extract_bits_32(apic_id, smtMaskWidth, smtMaskWidth + coreMaskWidth - 1) : 0; entry.socket_id = extract_bits_32(apic_id, smtMaskWidth + coreMaskWidth, 31); entry.tile_id = extract_bits_32(apic_id, l2CacheMaskShift, 31); entry.socket_unique_core_id = entry.core_id; + #ifndef USER_KERNEL_SHARED + DBG(1, "entry.os_id = ", entry.os_id, " apic_id = ", apic_id, " entry.thread_id = ", entry.thread_id, " entry.core_id = ", entry.core_id, " entry.socket_id = ", entry.socket_id , " entry.tile_id = ", entry.tile_id, " entry.socket_unique_core_id = ", entry.socket_unique_core_id); + #endif } inline bool initCoreMasks(uint32 & smtMaskWidth, uint32 & coreMaskWidth, uint32 & l2CacheMaskShift, uint32 & l3CacheMaskShift) @@ -143,6 +149,9 @@ inline bool initCoreMasks(uint32 & smtMaskWidth, uint32 & coreMaskWidth, uint32 } levelType = extract_bits_32(cpuid_args.array[2], 8, 15); levelShift = extract_bits_32(cpuid_args.array[0], 0, 4); + #ifndef USER_KERNEL_SHARED + DBG(1, "levelType = ", levelType, " levelShift = ", levelShift); + #endif switch (levelType) { case 1: //level type is SMT, so levelShift is the SMT_Mask_Width @@ -251,6 +260,9 @@ inline bool initCoreMasks(uint32 & smtMaskWidth, uint32 & coreMaskWidth, uint32 } #endif } + #ifndef USER_KERNEL_SHARED + DBG(1, "smtMaskWidth = ", smtMaskWidth, " coreMaskWidth = ", coreMaskWidth, " l2CacheMaskShift = ", l2CacheMaskShift, " l3CacheMaskShift = ", l3CacheMaskShift); + #endif return true; }