diff --git a/cabal.project b/cabal.project index 20d0e9a..9311bb4 100644 --- a/cabal.project +++ b/cabal.project @@ -15,6 +15,7 @@ index-state: packages: trace-dispatcher + hermod-trace-resources hermod-recon-framework program-options diff --git a/hermod-trace-resources/CHANGELOG.md b/hermod-trace-resources/CHANGELOG.md new file mode 100644 index 0000000..2ba496f --- /dev/null +++ b/hermod-trace-resources/CHANGELOG.md @@ -0,0 +1,9 @@ +# Changelog for hermod-trace-resources + +## 1.0.0 + +- Initial release as `hermod-trace-resources` + (moved from `cardano-node/trace-resources`, renamed from `trace-resources`). +- Module namespace: `Hermod.Tracing.Resources.*`. +- Single public module: `Hermod.Tracing.Resources`; platform-specific + implementation modules are internal. diff --git a/hermod-trace-resources/LICENSE b/hermod-trace-resources/LICENSE new file mode 100644 index 0000000..f433b1a --- /dev/null +++ b/hermod-trace-resources/LICENSE @@ -0,0 +1,177 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS diff --git a/hermod-trace-resources/NOTICE b/hermod-trace-resources/NOTICE new file mode 100644 index 0000000..2595781 --- /dev/null +++ b/hermod-trace-resources/NOTICE @@ -0,0 +1,13 @@ +Copyright 2021-2023 Input Output Global Inc (IOG), 2023-2026 Intersect. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/hermod-trace-resources/README.md b/hermod-trace-resources/README.md new file mode 100644 index 0000000..cc354d6 --- /dev/null +++ b/hermod-trace-resources/README.md @@ -0,0 +1,95 @@ +# hermod-trace-resources + +OS-level and RTS resource sampling for the +[Hermod tracing system](https://github.com/IntersectMBO/hermod-tracing). + +## What it does + +`hermod-trace-resources` exposes a single IO action: + +```haskell +readResourceStats :: IO (Maybe ResourceStats) +``` + +Call it once per sampling interval to get a snapshot of the current process's +resource usage. The returned `ResourceStats` value carries `LogFormatting` and +`MetaTrace` instances from `trace-dispatcher`, so it can be fed directly into any +`Trace IO ResourceStats`. + +## Platform support + +| Platform | CPU & GC | Memory (RSS) | Block I/O | Network I/O | Threads | +|----------|----------|--------------|-----------|-------------|---------| +| Linux | ✓ | ✓ | ✓ | ✓ (opt-in) | ✓ | +| macOS | ✓ | ✓ | — | ✓ | ✓ | +| Windows | ✓ | ✓ | ✓ | — | ✓ | +| Other | ✓ (RTS only) | — | — | — | ✓ | + +Fields that cannot be read on a given platform are reported as `0`. + +## Quick start + +```haskell +import Hermod.Tracing.Resources + +sample :: IO () +sample = do + mStats <- readResourceStats + case mStats of + Nothing -> putStrLn "resource sampling unavailable" + Just stats -> print stats +``` + +## Measured quantities + +| Field | Unit | Source | +|---------------|-----------------|-------------------------------------| +| `rCentiCpu` | centiseconds | `/proc/self/stat` (Linux), OS APIs | +| `rCentiGC` | centiseconds | GHC RTS stats | +| `rCentiMut` | centiseconds | GHC RTS stats | +| `rGcsMajor` | count | GHC RTS stats | +| `rGcsMinor` | count | GHC RTS stats | +| `rAlloc` | bytes | GHC RTS stats (cumulative) | +| `rLive` | bytes | GHC RTS stats (after last GC) | +| `rHeap` | bytes | GHC RTS stats (committed) | +| `rRSS` | bytes | OS kernel (resident set size) | +| `rCentiBlkIO` | centiseconds | `/proc/self/stat` (Linux only) | +| `rNetRd` | bytes | `/proc/self/net/netstat` (Linux, see below) | +| `rNetWr` | bytes | `/proc/self/net/netstat` (Linux, see below) | +| `rFsRd` | bytes | `/proc/self/io` (Linux), OS APIs | +| `rFsWr` | bytes | `/proc/self/io` (Linux), OS APIs | +| `rThreads` | count | GHC RTS stats (live green threads) | + +## Cabal flag: `with-netstat` + +Network I/O on Linux is read from `/proc/self/net/netstat`, which is a system-wide +counter file — the values represent all IP traffic on the host, not just this process. +Because parsing this file on every sample has a non-trivial cost, it is disabled by +default. + +Enable it with: + +``` +cabal build -f with-netstat +``` + +or in your `cabal.project`: + +``` +package hermod-trace-resources + flags: +with-netstat +``` + +## Integration with trace-dispatcher + +`ResourceStats` implements both `LogFormatting` (human-readable text and structured +JSON) and `MetaTrace` (namespace, severity, metric documentation). Wire it up like +any other traced value: + +```haskell +tr <- mkCardanoTracer trBase mempty ["Resources"] [EKGBackend] +configureTracers initialTraceConfig trDocumented [tr] +forM_ [1..] $ \_ -> do + threadDelay 5_000_000 -- 5 s + readResourceStats >>= mapM_ (traceWith tr) +``` diff --git a/hermod-trace-resources/bench/trace-resources-bench.hs b/hermod-trace-resources/bench/trace-resources-bench.hs new file mode 100644 index 0000000..ec11705 --- /dev/null +++ b/hermod-trace-resources/bench/trace-resources-bench.hs @@ -0,0 +1,11 @@ +import Criterion.Main +import Criterion.Types + +import Hermod.Tracing.Resources (readResourceStats) + + +main :: IO () +main = + defaultMainWith defaultConfig{ timeLimit = 15 } + [ bench "readResourceStats" (whnfIO readResourceStats) + ] diff --git a/hermod-trace-resources/cbits/os-support-darwin.c b/hermod-trace-resources/cbits/os-support-darwin.c new file mode 100644 index 0000000..a71aa18 --- /dev/null +++ b/hermod-trace-resources/cbits/os-support-darwin.c @@ -0,0 +1,289 @@ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* +// includes for c_get_sys_disk_io_counters +// will require GHC options in cabal file: +// if os(darwin) +// ghc-options: -framework CoreFoundation -framework IOKit +// +#include +#include +#include +#include +#include +#include +#include +*/ +#include "os-support-darwin.h" + + +/* c_get_process_memory_info */ + +int c_get_process_memory_info2(struct mach_task_basic_info *counters, int pid) +{ + task_t task = MACH_PORT_NULL; + if (task_for_pid(current_task(), pid, &task) != KERN_SUCCESS) { + return -2; + } + struct mach_task_basic_info t_info; + mach_msg_type_number_t t_info_count = MACH_TASK_BASIC_INFO_COUNT; + if (task_info(task, MACH_TASK_BASIC_INFO, (task_info_t)counters, &t_info_count) != KERN_SUCCESS) { + return -1; + } + return 1; +} + + +/* c_get_host_info */ +/* currently this is not used +int c_get_host_info2(struct host_basic_info *counters) +{ + mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT; + mach_port_t host_port = mach_host_self(); + if (host_statistics(host_port, HOST_BASIC_INFO, + (host_info_t)counters, &count) != KERN_SUCCESS) { + return -2; + } + mach_port_deallocate(mach_task_self(), host_port); + return 1; +} */ + +/* c_get_boot_time */ + +long c_get_boot_time2() +{ + // copied from psutil + // fetch sysctl "kern.boottime" + static int request[2] = { CTL_KERN, KERN_BOOTTIME }; + struct timeval result; + size_t result_len = sizeof result; + time_t boot_time = 0; + + if (sysctl(request, 2, &result, &result_len, NULL, 0) == -1) { + return -1; + } + return result.tv_sec; +} + +/* c_get_sys_cpu_times */ +int c_get_sys_cpu_times2(CPU_TIMES *counters) +{ + mach_msg_type_number_t count = HOST_CPU_LOAD_INFO_COUNT; + host_cpu_load_info_data_t r_load; + + mach_port_t host_port = mach_host_self(); + if (host_statistics(host_port, HOST_CPU_LOAD_INFO, + (host_info_t)&r_load, &count) != KERN_SUCCESS) { + return -2; + } + mach_port_deallocate(mach_task_self(), host_port); + counters->usertime = r_load.cpu_ticks[CPU_STATE_USER] * 100000 / CLK_TCK; + counters->systime = r_load.cpu_ticks[CPU_STATE_SYSTEM] * 100000 / CLK_TCK; + counters->idletime = r_load.cpu_ticks[CPU_STATE_IDLE] * 100000 / CLK_TCK; + counters->nicetime = r_load.cpu_ticks[CPU_STATE_NICE] * 100000 / CLK_TCK; + return 1; +} + +/* c_get_proc_cpu_times */ + + + +/* c_get_sys_disk_io_counters */ +/* adapted from psutil */ +int c_get_sys_disk_io_counters2(DISK_COUNTERS *counters) { + counters->ndsks = 0; + // uncomment the following to extract disk I/O metrics + // requires to include the right headers (see top of this file) + // and link to frameworks on Darwin (also described there) +/* + int noutput = 0; + CFDictionaryRef parent_dict; + CFDictionaryRef props_dict; + CFDictionaryRef stats_dict; + io_registry_entry_t parent; + io_registry_entry_t disk; + io_iterator_t disk_list; + + // Get list of disks + if (IOServiceGetMatchingServices( + kIOMasterPortDefault, + IOServiceMatching(kIOMediaClass), + &disk_list) != kIOReturnSuccess) { + return -4; + } + + // Iterate over disks + while ((disk = IOIteratorNext(disk_list)) != 0) { + parent_dict = NULL; + props_dict = NULL; + stats_dict = NULL; + + if (IORegistryEntryGetParentEntry(disk, kIOServicePlane, &parent) != kIOReturnSuccess) { + return -3; + } + + if (IOObjectConformsTo(parent, "IOBlockStorageDriver")) { + + if (IORegistryEntryCreateCFProperties(disk, + (CFMutableDictionaryRef *) &parent_dict, + kCFAllocatorDefault, kNilOptions) != kIOReturnSuccess) { + IOObjectRelease(disk); + IOObjectRelease(parent); + return -2; + } + + if (IORegistryEntryCreateCFProperties(parent, + (CFMutableDictionaryRef *) &props_dict, + kCFAllocatorDefault, kNilOptions) != kIOReturnSuccess) { + CFRelease(props_dict); + IOObjectRelease(disk); + IOObjectRelease(parent); + return -1; + } + + const int kMaxDiskNameSize = 64; + char disk_name[kMaxDiskNameSize+1]; memset(disk_name, 0, kMaxDiskNameSize+1); + CFStringRef disk_name_ref = (CFStringRef) + CFDictionaryGetValue(parent_dict, CFSTR(kIOBSDNameKey)); + CFStringGetCString(disk_name_ref, disk_name, + kMaxDiskNameSize, + CFStringGetSystemEncoding()); + counters->dsknames[noutput] = strdup(disk_name); + + stats_dict = (CFDictionaryRef) + CFDictionaryGetValue(props_dict, CFSTR(kIOBlockStorageDriverStatisticsKey)); + if (stats_dict == NULL) { + continue; + } + CFNumberRef number; + int64_t t_num64 = 0; + // Get disk reads/writes + if ((number = (CFNumberRef)CFDictionaryGetValue(stats_dict, + CFSTR(kIOBlockStorageDriverStatisticsReadsKey)))) { + CFNumberGetValue(number, kCFNumberSInt64Type, &t_num64); + counters->dsks[noutput].reads = t_num64; + } + if ((number = (CFNumberRef)CFDictionaryGetValue(stats_dict, + CFSTR(kIOBlockStorageDriverStatisticsWritesKey)))) { + CFNumberGetValue(number, kCFNumberSInt64Type, &t_num64); + counters->dsks[noutput].writes = t_num64; + } + // Get disk bytes read/written + if ((number = (CFNumberRef)CFDictionaryGetValue(stats_dict, + CFSTR(kIOBlockStorageDriverStatisticsBytesReadKey)))) { + CFNumberGetValue(number, kCFNumberSInt64Type, &t_num64); + counters->dsks[noutput].read_bytes = t_num64; + } + if ((number = (CFNumberRef)CFDictionaryGetValue(stats_dict, + CFSTR(kIOBlockStorageDriverStatisticsBytesWrittenKey)))) { + CFNumberGetValue(number, kCFNumberSInt64Type, &t_num64); + counters->dsks[noutput].write_bytes = t_num64; + } + // Get disk time spent reading/writing (nanoseconds) + if ((number = (CFNumberRef)CFDictionaryGetValue(stats_dict, + CFSTR(kIOBlockStorageDriverStatisticsTotalReadTimeKey)))) { + CFNumberGetValue(number, kCFNumberSInt64Type, &t_num64); + counters->dsks[noutput].read_time = t_num64; + } + if ((number = (CFNumberRef)CFDictionaryGetValue(stats_dict, + CFSTR(kIOBlockStorageDriverStatisticsTotalWriteTimeKey)))) { + CFNumberGetValue(number, kCFNumberSInt64Type, &t_num64); + counters->dsks[noutput].write_time = t_num64; + } + + CFRelease(parent_dict); + IOObjectRelease(parent); + CFRelease(props_dict); + IOObjectRelease(disk); + + noutput++; + if (noutput >= MAX_DISK_COUNTERS) { break; } + } + } // while + IOObjectRelease (disk_list); + counters->ndsks = noutput; +*/ + return 1; +} + +/* c_get_sys_network_io_counters */ +/* adapted from psutil */ +int c_get_sys_network_io_counters2(NET_IO *counters) { + counters->nifs = 0; + int noutput = 0; + char *msghdrbuf = NULL, *end_of_list, *next; + struct if_msghdr *ifm; + int mib[6]; + mib[0] = CTL_NET; // networking subsystem + mib[1] = PF_ROUTE; // type of information + mib[2] = 0; // protocol (IPPROTO_xxx) + mib[3] = 0; // address family + mib[4] = NET_RT_IFLIST2; // operation + mib[5] = 0; + size_t buflen; + if (sysctl(mib, 6, NULL, &buflen, NULL, 0) < 0) { + return -4; + } + + msghdrbuf = malloc(buflen); + if (msghdrbuf == NULL) { + return -3; + } + if (sysctl(mib, 6, msghdrbuf, &buflen, NULL, 0) < 0) { + return -2; + } + + char nmbuf[12+1]; + + end_of_list = msghdrbuf + buflen; + for (next = msghdrbuf; next < end_of_list; ) { + ifm = (struct if_msghdr *)next; + next += ifm->ifm_msglen; + if (ifm->ifm_type == RTM_IFINFO2 && (ifm->ifm_flags & IFF_UP)) { + struct if_msghdr2 *if2m = (struct if_msghdr2 *)ifm; + // access name of interface and make a copy of the string + // struct sockaddr_dl *sdl = (struct sockaddr_dl *)(if2m + 1); + // memset(nmbuf, 0, 12+1); + // strncpy(nmbuf, sdl->sdl_data, MIN(12,sdl->sdl_nlen)); + // counters->ifnames[noutput] = strdup(nmbuf); + + // copy struct if_data64 + memcpy(&counters->ifs[noutput], &if2m->ifm_data, sizeof(struct if_data64)); + noutput++; + } + if (noutput >= MAX_NET_IO) { break; } + } + counters->nifs = noutput; + free(msghdrbuf); + return 1; +} + +uint64_t c_get_process_cpu_time_microseconds(pid_t pid) { + struct proc_taskinfo ti; + + int ret = proc_pidinfo(pid, PROC_PIDTASKINFO, 0, &ti, sizeof(ti)); + + if (ret <= 0) { + perror("proc_pidinfo"); + return 0; + } + + uint64_t user_microsec = ti.pti_total_user / 1e3; + uint64_t sys_microsec = ti.pti_total_system / 1e3; + + return user_microsec + sys_microsec; +} diff --git a/hermod-trace-resources/cbits/os-support-win.c b/hermod-trace-resources/cbits/os-support-win.c new file mode 100644 index 0000000..3635e34 --- /dev/null +++ b/hermod-trace-resources/cbits/os-support-win.c @@ -0,0 +1,96 @@ +#include +#include +//#include + +#include "os-support-win.h" + + +/* c_get_process_memory_info */ + +int c_get_process_memory_info2 (PROCESS_MEMORY_COUNTERS *counters, DWORD pid) { + HANDLE hProc; + hProc = OpenProcess(PROCESS_QUERY_INFORMATION | PROCESS_VM_READ, + FALSE, pid ); + if (NULL == hProc) { return -2; } + BOOL result = GetProcessMemoryInfo(hProc, counters, sizeof(PROCESS_MEMORY_COUNTERS)); + CloseHandle(hProc); + if (result == 1) + return 0; + else + return -1; +} + + +/* c_get_io_counters */ + +int c_get_io_counters2 (IO_COUNTERS *counters, DWORD pid) { + HANDLE hProc; + hProc = OpenProcess(PROCESS_QUERY_LIMITED_INFORMATION, + FALSE, pid ); + if (NULL == hProc) { return -2; } + BOOL result = GetProcessIoCounters(hProc, counters); + CloseHandle(hProc); + return result; +} + + +// defined in 'psutil' +// #define LO_T 1e-7 +// #define HI_T 429.4967296 + +/* c_get_sys_cpu_times */ +int c_get_sys_cpu_times2 (CPU_TIMES *cputimes) { + FILETIME usert={0,0}, kernelt={0,0}, idlet={0,0}; + if (! GetSystemTimes(&idlet, &kernelt, &usert) ) { + return -2; + } + cputimes->usertime = ((ULONGLONG)usert.dwHighDateTime << 32 | usert.dwLowDateTime) / 10; + ULONGLONG kerneltime = ((ULONGLONG)kernelt.dwHighDateTime << 32 | kernelt.dwLowDateTime) / 10; + cputimes->idletime = ((ULONGLONG)idlet.dwHighDateTime << 32 | idlet.dwLowDateTime) / 10; + cputimes->systime = kerneltime - cputimes->idletime; + return 1; +} + +/* c_get_proc_cpu_times */ +int c_get_proc_cpu_times2 (CPU_TIMES *cputimes, DWORD pid) { + HANDLE hProc; + hProc = OpenProcess(PROCESS_QUERY_LIMITED_INFORMATION, + FALSE, pid ); + if (NULL == hProc) { return -2; } + FILETIME usert={0,0}, kernelt={0,0}, createt={0,0}, exitt={0,0}; + if (! GetProcessTimes(hProc, &createt, &exitt, &kernelt, &usert) ) { + return -1; + } + CloseHandle(hProc); + /* FILETIME is a structured of two 32 bit counters to form a 64 bit time in 100 ns units */ + /* divide by 10 to get microseconds */ + /* start time: Jan-1 1601 UTC */ + cputimes->usertime = ((ULONGLONG)usert.dwHighDateTime << 32 | usert.dwLowDateTime) / 10; + cputimes->systime = ((ULONGLONG)kernelt.dwHighDateTime << 32 | kernelt.dwLowDateTime) / 10; + // return time since process start in "idletime" + cputimes->idletime = ((ULONGLONG)createt.dwHighDateTime << 32 | createt.dwLowDateTime) / 10; + return 1; +} + +/* c_get_system_info */ +int c_get_system_info2 (SYSTEM_INFO *sysinfo) { + GetSystemInfo (sysinfo); + return 1; +} + +/* c_get_win_bits */ +int c_get_win_bits2 (DWORD pid) { + BOOL res; + HANDLE hProc; + hProc = OpenProcess(PROCESS_QUERY_LIMITED_INFORMATION, + FALSE, pid ); + if (NULL == hProc) { return -2; } + if (! IsWow64Process(hProc, &res)) { + return -1; + } + CloseHandle(hProc); + if (res) { + return 32; + } + return 64; +} diff --git a/hermod-trace-resources/hermod-trace-resources.cabal b/hermod-trace-resources/hermod-trace-resources.cabal new file mode 100644 index 0000000..947467c --- /dev/null +++ b/hermod-trace-resources/hermod-trace-resources.cabal @@ -0,0 +1,114 @@ +cabal-version: 3.0 + +name: hermod-trace-resources +version: 1.0.0 +synopsis: OS resource sampling for the Hermod tracing system +description: Provides a single IO action, 'readResourceStats', that samples + the calling process's resource usage (CPU, GC, memory, I/O, + threads) and returns it as a 'LogFormatting' / 'MetaTrace' value + ready to feed into the trace-dispatcher pipeline. + Supports Linux, macOS, and Windows; falls back to GHC RTS metrics + only on other platforms. +category: Cardano, + Trace, +copyright: 2021-2023 Input Output Global Inc (IOG), 2023-2026 Intersect. +author: Juergen Nicklisch-Franken +maintainer: operations@iohk.io +license: Apache-2.0 +license-files: LICENSE + NOTICE +extra-source-files: include/os-support-darwin.h + include/os-support-win.h +extra-doc-files: CHANGELOG.md + README.md + +Flag with-netstat + Description: Enable netstat values in Linux resource traces (potentially expensive) + Manual: True + Default: False + +common project-config + default-language: Haskell2010 + +library + import: project-config + hs-source-dirs: src + exposed-modules: Hermod.Tracing.Resources + other-modules: Hermod.Tracing.Resources.Types + Hermod.Tracing.Resources.Dummy + + default-extensions: OverloadedStrings + CPP + build-depends: base >=4.12 && <5 + , trace-dispatcher + , text + , aeson >= 2.1.0.0 + + if os(windows) + build-depends: Win32 + else + build-depends: unix + + ghc-options: -Wall + -Wcompat + -Wincomplete-uni-patterns + -Wincomplete-record-updates + -Wpartial-fields + -Widentities + -Wredundant-constraints + -Wmissing-export-lists + -Wno-incomplete-patterns + + if flag(with-netstat) + CPP-options: -DWITH_NETSTAT + + if os(linux) + other-modules: Hermod.Tracing.Resources.Linux + if os(windows) + other-modules: Hermod.Tracing.Resources.Windows + c-sources: cbits/os-support-win.c + include-dirs: include/ + cc-options: -DPSAPI_VERSION=2 + if os(darwin) + other-modules: Hermod.Tracing.Resources.Darwin + c-sources: cbits/os-support-darwin.c + include-dirs: include/ + +test-suite trace-resources-test + import: project-config + type: exitcode-stdio-1.0 + hs-source-dirs: test + main-is: trace-resources-test.hs + default-extensions: OverloadedStrings + build-depends: base >=4.12 && <5 + , trace-dispatcher + , text + , aeson + , QuickCheck + , tasty + , tasty-quickcheck + , hermod-trace-resources + ghc-options: "-with-rtsopts=-T" + -Wall + -Wcompat + -Wincomplete-uni-patterns + -Wincomplete-record-updates + -Wpartial-fields + -Widentities + -Wredundant-constraints + -Wmissing-export-lists + -Wno-incomplete-patterns + +benchmark bench + import: project-config + type: exitcode-stdio-1.0 + main-is: trace-resources-bench.hs + hs-source-dirs: bench + build-depends: base >=4.12 && <5 + , criterion + , hermod-trace-resources + ghc-options: + -Wall -Wcompat -Wincomplete-record-updates + -Wincomplete-uni-patterns -Wpartial-fields -Wredundant-constraints + -Wunused-packages -threaded -rtsopts -O2 + "-with-rtsopts=-T" diff --git a/hermod-trace-resources/include/os-support-darwin.h b/hermod-trace-resources/include/os-support-darwin.h new file mode 100644 index 0000000..8a92980 --- /dev/null +++ b/hermod-trace-resources/include/os-support-darwin.h @@ -0,0 +1,38 @@ +#include +#include +#include +#include + +typedef struct _CPU_TIMES { + int64_t usertime; + int64_t systime; + int64_t idletime; + int64_t nicetime; +} CPU_TIMES; + +#define MAX_NET_IO 32 +typedef struct _NET_IO { + u_int32_t nifs; + char* ifnames[MAX_NET_IO]; + struct if_data64 ifs[MAX_NET_IO]; +} NET_IO; + +typedef struct _DISK_INFO { + int64_t reads, writes; + int64_t read_bytes, write_bytes; + int64_t read_time, write_time; // nanoseconds +} DISK_INFO; +#define MAX_DISK_COUNTERS 32 +typedef struct _DISK_COUNTERS { + u_int32_t ndsks; + char* dsknames[MAX_DISK_COUNTERS]; + DISK_INFO dsks[MAX_DISK_COUNTERS]; +} DISK_COUNTERS; + +int c_get_process_memory_info2 (struct mach_task_basic_info *counters, int pid); +//int c_get_host_info (struct host_basic_info *counters); +long c_get_boot_time2(); +int c_get_sys_cpu_times2(CPU_TIMES *counters); +int c_get_sys_network_io_counters2(NET_IO *counters); +int c_get_sys_disk_io_counters2(DISK_COUNTERS *counters); +uint64_t c_get_process_cpu_time_microseconds(pid_t); diff --git a/hermod-trace-resources/include/os-support-win.h b/hermod-trace-resources/include/os-support-win.h new file mode 100644 index 0000000..333ef6a --- /dev/null +++ b/hermod-trace-resources/include/os-support-win.h @@ -0,0 +1,14 @@ + +typedef struct _CPU_TIMES { + ULONGLONG usertime; + ULONGLONG systime; + ULONGLONG idletime; +} CPU_TIMES; + +int c_get_process_memory_info2 (PROCESS_MEMORY_COUNTERS *counters, DWORD pid); +int c_get_io_counters2 (IO_COUNTERS *counters, DWORD pid); +int c_get_sys_cpu_times2 (CPU_TIMES *cputimes); +int c_get_proc_cpu_times2 (CPU_TIMES *cputimes, DWORD pid); +int c_get_system_info2 (SYSTEM_INFO *sysinfo); +int c_get_win_bits2 (DWORD pid); + diff --git a/hermod-trace-resources/src/Hermod/Tracing/Resources.hs b/hermod-trace-resources/src/Hermod/Tracing/Resources.hs new file mode 100644 index 0000000..f9cc288 --- /dev/null +++ b/hermod-trace-resources/src/Hermod/Tracing/Resources.hs @@ -0,0 +1,44 @@ +{-# LANGUAGE CPP #-} + +-- | OS-level and RTS resource sampling for the Hermod tracing system. +-- +-- This is the only public module of the package. Import it and call +-- 'readResourceStats' once per sampling interval to obtain a 'ResourceStats' +-- snapshot. The value carries 'LogFormatting' and 'MetaTrace' instances and +-- can be fed directly into any @Trace IO ResourceStats@. +-- +-- Platform support: +-- +-- * __Linux__ — CPU, GC, memory (RSS), block I/O, filesystem I/O, network (opt-in), threads. +-- * __macOS__ — CPU, GC, memory (RSS), network I/O, threads; disk I/O stubbed. +-- * __Windows__ — CPU, GC, memory (RSS), block I/O, threads. +-- * __Other__ — GHC RTS metrics (CPU, GC, threads) only; OS fields report 0. +module Hermod.Tracing.Resources + ( Resources(..) + , ResourceStats + , readResourceStats + ) where + + +import Hermod.Tracing.Resources.Types +#if defined(linux_HOST_OS) +import qualified Hermod.Tracing.Resources.Linux as Platform +#elif defined(mingw32_HOST_OS) +import qualified Hermod.Tracing.Resources.Windows as Platform +#elif defined(darwin_HOST_OS) +import qualified Hermod.Tracing.Resources.Darwin as Platform +#else +import qualified Hermod.Tracing.Resources.Dummy as Platform +#endif + + +-- | Sample resource usage of the current process. +-- +-- Returns 'Nothing' only when the underlying OS interface is unavailable +-- (e.g. @\/proc@ not mounted). Under normal operating conditions this +-- always returns 'Just'. +-- +-- The returned 'ResourceStats' is a @'Resources' 'Word64'@; fields that +-- cannot be measured on the current platform are set to @0@. +readResourceStats :: IO (Maybe ResourceStats) +readResourceStats = Platform.readResourceStatsInternal diff --git a/hermod-trace-resources/src/Hermod/Tracing/Resources/Darwin.hsc b/hermod-trace-resources/src/Hermod/Tracing/Resources/Darwin.hsc new file mode 100644 index 0000000..e26cba9 --- /dev/null +++ b/hermod-trace-resources/src/Hermod/Tracing/Resources/Darwin.hsc @@ -0,0 +1,114 @@ +{-# LANGUAGE CPP #-} +{-# LANGUAGE ForeignFunctionInterface #-} + +module Hermod.Tracing.Resources.Darwin + ( readResourceStatsInternal + ) where + +#include "os-support-darwin.h" + +import Data.Word (Word64) +import Foreign.C.Types +import Foreign.Marshal.Alloc +import Foreign.Marshal.Error +import Foreign.Ptr +import Foreign.Storable +import qualified GHC.Stats as GhcStats +import System.Posix.Process (getProcessID) +import System.Posix.Types (ProcessID) +import Hermod.Tracing.Resources.Types + +{- type aliases -} +type MACH_VM_SIZE_T = Word64 +data TIME_VALUE_T = TIME_VALUE_T Word64 Word64 + +{- memory information -} + +{- mach/task_info.h +struct time_value { + integer_t seconds; + integer_t microseconds; +}; +struct mach_task_basic_info { + mach_vm_size_t virtual_size; /* virtual memory size (bytes) */ + mach_vm_size_t resident_size; /* resident memory size (bytes) */ + mach_vm_size_t resident_size_max; /* maximum resident memory size (bytes) */ + time_value_t user_time; /* total user run time for + * terminated threads */ + time_value_t system_time; /* total system run time for + * terminated threads */ + policy_t policy; /* default policy for new threads */ + integer_t suspend_count; /* suspend count for task */ +}; -} + +data MachTaskBasicInfo = MachTaskBasicInfo + { _virtual_size :: !MACH_VM_SIZE_T + , _resident_size :: !MACH_VM_SIZE_T + , _resident_size_max :: !MACH_VM_SIZE_T + , _user_time :: !TIME_VALUE_T + , _system_time :: !TIME_VALUE_T + , _policy :: !Word64 + , _suspend_count :: !Word64 + } + +instance Storable TIME_VALUE_T where + alignment _ = #const offsetof(struct {char x__; struct time_value (y__); }, y__) + sizeOf _ = #size struct time_value + peek ptr = TIME_VALUE_T + <$> (#peek struct time_value, seconds) ptr + <*> (#peek struct time_value, microseconds) ptr + poke _ _ = pure () + +instance Storable MachTaskBasicInfo where + alignment _ = #const offsetof(struct {char x__; struct mach_task_basic_info (y__); }, y__) + sizeOf _ = #size struct mach_task_basic_info + peek ptr = MachTaskBasicInfo + <$> (#peek struct mach_task_basic_info, virtual_size) ptr + <*> (#peek struct mach_task_basic_info, resident_size) ptr + <*> (#peek struct mach_task_basic_info, resident_size_max) ptr + <*> (#peek struct mach_task_basic_info, user_time) ptr + <*> (#peek struct mach_task_basic_info, system_time) ptr + <*> (#peek struct mach_task_basic_info, policy) ptr + <*> (#peek struct mach_task_basic_info, suspend_count) ptr + poke _ _ = pure () + +foreign import ccall unsafe c_get_process_memory_info2 :: Ptr MachTaskBasicInfo -> CInt -> IO CInt + +foreign import ccall unsafe c_get_process_cpu_time_microseconds :: CInt -> IO Word64 + + +getMemoryInfo :: ProcessID -> IO MachTaskBasicInfo +getMemoryInfo pid = + allocaBytes 128 $ \ptr -> do + throwIfNeg_ (\res -> "c_get_process_memory_info2: failure returned: " ++ show (pred res)) + (succ <$> c_get_process_memory_info2 ptr (fromIntegral pid)) + peek ptr + +readResourceStatsInternal :: IO (Maybe ResourceStats) +readResourceStatsInternal = getProcessID >>= \pid -> do + rts <- GhcStats.getRTSStats + mem <- getMemoryInfo pid + cpuTimeMicro <- c_get_process_cpu_time_microseconds (fromIntegral pid) + pure . Just $ + Resources + { rCentiCpu = usToCenti cpuTimeMicro + , rCentiGC = nsToCenti $ GhcStats.gc_cpu_ns rts + , rCentiMut = nsToCenti $ GhcStats.mutator_cpu_ns rts + , rGcsMajor = fromIntegral $ GhcStats.major_gcs rts + , rGcsMinor = fromIntegral $ GhcStats.gcs rts - GhcStats.major_gcs rts + , rAlloc = GhcStats.allocated_bytes rts + , rLive = GhcStats.gcdetails_live_bytes $ GhcStats.gc rts + , rHeap = GhcStats.gcdetails_mem_in_use_bytes $ GhcStats.gc rts + , rRSS = _resident_size mem + , rCentiBlkIO = 0 + , rNetRd = 0 + , rNetWr = 0 + , rFsRd = 0 + , rFsWr = 0 + , rThreads = 0 + } + where + nsToCenti :: GhcStats.RtsTime -> Word64 + nsToCenti = fromIntegral . (`div` 10000000) + usToCenti :: Word64 -> Word64 + usToCenti = (`div` 10000) diff --git a/hermod-trace-resources/src/Hermod/Tracing/Resources/Dummy.hs b/hermod-trace-resources/src/Hermod/Tracing/Resources/Dummy.hs new file mode 100644 index 0000000..e35ca0b --- /dev/null +++ b/hermod-trace-resources/src/Hermod/Tracing/Resources/Dummy.hs @@ -0,0 +1,39 @@ +module Hermod.Tracing.Resources.Dummy + ( + readResourceStatsInternal + ) where + +import Hermod.Tracing.Resources.Types + +import Data.Word +import qualified GHC.Stats as GhcStats +import System.CPUTime + + +readResourceStatsInternal :: IO (Maybe ResourceStats) +readResourceStatsInternal = do + cpu <- getCPUTime + rts <- GhcStats.getRTSStats + pure $ Just $ + Resources + { rCentiCpu = intToCenti cpu + , rCentiGC = nsToCenti $ GhcStats.gc_cpu_ns rts + , rCentiMut = nsToCenti $ GhcStats.mutator_cpu_ns rts + , rGcsMajor = fromIntegral $ GhcStats.major_gcs rts + , rGcsMinor = fromIntegral $ GhcStats.gcs rts - GhcStats.major_gcs rts + , rAlloc = GhcStats.allocated_bytes rts + , rLive = GhcStats.gcdetails_live_bytes $ GhcStats.gc rts + , rRSS = 0 + , rHeap = 0 + , rCentiBlkIO = 0 + , rNetRd = 0 + , rNetWr = 0 + , rFsRd = 0 + , rFsWr = 0 + , rThreads = 0 + } + where + nsToCenti :: GhcStats.RtsTime -> Word64 + nsToCenti = fromIntegral . (`div` 10000000) + intToCenti :: Integer -> Word64 + intToCenti = fromIntegral . (`div` 10000000) diff --git a/hermod-trace-resources/src/Hermod/Tracing/Resources/Linux.hs b/hermod-trace-resources/src/Hermod/Tracing/Resources/Linux.hs new file mode 100644 index 0000000..63292a3 --- /dev/null +++ b/hermod-trace-resources/src/Hermod/Tracing/Resources/Linux.hs @@ -0,0 +1,153 @@ +{-# LANGUAGE CPP #-} +{-# LANGUAGE RecordWildCards #-} + +module Hermod.Tracing.Resources.Linux + ( + readResourceStatsInternal + ) where + +import Hermod.Tracing.Resources.Types +import Data.Maybe (fromMaybe) +import qualified Data.Text as T +import qualified Data.Text.IO as T (readFile) +import qualified Data.Text.Read as T (decimal) +import Data.Word +import qualified GHC.Stats as GhcStats +import System.Posix.Files (fileMode, getFileStatus, intersectFileModes, ownerReadMode) + +-- * Disk IO stats: +-- /proc/[pid]/io (since kernel 2.6.20) +-- This file contains I/O statistics for the process, for example: +-- +-- # cat /proc/3828/io +-- rchar: 323934931 +-- wchar: 323929600 +-- syscr: 632687 +-- syscw: 632675 +-- read_bytes: 0 +-- write_bytes: 323932160 +-- cancelled_write_bytes: 0 +-- +-- The fields are as follows: +-- +-- rchar: characters read +-- The number of bytes which this task has caused to be read from storage. This is simply the sum +-- of bytes which this process passed to read(2) and similar system calls. It includes things such +-- as terminal I/O and is unaffected by whether or not actual physical disk I/O was required (the +-- read might have been satisfied from pagecache). +-- +-- wchar: characters written +-- The number of bytes which this task has caused, or shall cause to be written to disk. Similar +-- caveats apply here as with rchar. +-- +-- syscr: read syscalls +-- Attempt to count the number of read I/O operations-that is, system calls such as read(2) and +-- pread(2). +-- +-- syscw: write syscalls +-- Attempt to count the number of write I/O operations-that is, system calls such as write(2) and +-- pwrite(2). +-- +-- read_bytes: bytes read +-- Attempt to count the number of bytes which this process really did cause to be fetched from the +-- storage layer. This is accurate for block-backed filesystems. +-- +-- write_bytes: bytes written +-- Attempt to count the number of bytes which this process caused to be sent to the storage layer. +-- +-- cancelled_write_bytes: +-- The big inaccuracy here is truncate. If a process writes 1MB to a file and then deletes the +-- file, it will in fact perform no writeout. But it will have been accounted as having caused 1MB +-- of write. In other words: this field represents the number of bytes which this process caused +-- to not happen, by truncating pagecache. A task can cause "negative" I/O too. If this task +-- truncates some dirty pagecache, some I/O which another task has been accounted for (in its +-- write\_bytes) will not be happening. +-- +-- Note: In the current implementation, things are a bit racy on 32-bit systems: if process A reads +-- process B's /proc/[pid]/io while process B is updating one of these 64-bit counters, process A could +-- see an intermediate result. +-- +-- Permission to access this file is governed by a ptrace access mode PTRACE\_MODE\_READ\_FSCREDS check; see +-- ptrace(2). +-- +readProcBlockInOut :: IO (Word64, Word64) +readProcBlockInOut = do + fields <- readProcList "/proc/self/io" + case -- We're only interested in 'read_bytes' & 'write_bytes': + fmap fromInteger . take 3 . drop 9 $ fields of + [fsRd, _, fsWr] -> pure (fsRd, fsWr) + _ -> pure (0, 0) + +-- * Network stats: +-- grep IpExt /proc//net/netstat +-- IpExt: InNoRoutes InTruncatedPkts InMcastPkts OutMcastPkts InBcastPkts OutBcastPkts InOctets OutOctets InMcastOctets OutMcastOctets InBcastOctets OutBcastOctets InCsumErrors InNoECTPkts InECT1Pkts InECT0Pkts InCEPkts +-- IpExt: 0 0 20053 8977 2437 23 3163525943 196480057 2426648 1491754 394285 5523 0 3513269 0 217426 0 +-- +readProcNetInOut :: IO (Word64, Word64) +#ifdef WITH_NETSTAT +readProcNetInOut = do + fields <- T.words . fourthLine . T.lines <$> T.readFile "/proc/self/net/netstat" + case -- We're only interested in 'InOctets' & 'OutOctets': + fmap readMaybeText . take 2 . drop 7 $ fields of + [Just netIn, Just netOut] -> pure (netIn, netOut) + _ -> pure (0, 0) + where + -- Assumption: 'IpExt:' values are on the fourth line of how the kernel displays the buffer + fourthLine ls = case drop 3 ls of + l:_ -> l + _ -> T.empty +#else +readProcNetInOut = pure (0, 0) +#endif + +-- | TODO we have to expand the |readMemStats| function +-- to read full data from |proc| +readResourceStatsInternal :: IO (Maybe ResourceStats) +readResourceStatsInternal = do + rts <- GhcStats.getRTSStats + net <- readProcNetInOut + fs <- readProcBlockInOut + mkProcStats rts net fs . fmap fromIntegral <$> readProcList "/proc/self/stat" + where + mkProcStats :: GhcStats.RTSStats -> (Word64, Word64) -> (Word64, Word64) -> [Word64] -> Maybe ResourceStats + mkProcStats rts + (rNetRd, rNetWr) + (rFsRd, rFsWr) + (_:_:_:_:_:_:_:_:_:_ -- 00-09 + :_:_:_:user:sys:_:_:_:_:rThreads -- 10-19 + :_:_:_:rss:_:_:_:_:_:_ -- 20-29 + :_:_:_:_:_:_:_:_:_:_ -- 30-39 + :_:rCentiBlkIO:_rest) = -- 40-42 + Just $ Resources + { rCentiCpu = user + sys + , rCentiGC = nsToCenti $ GhcStats.gc_cpu_ns rts + , rCentiMut = nsToCenti $ GhcStats.mutator_cpu_ns rts + , rGcsMajor = fromIntegral $ GhcStats.major_gcs rts + , rGcsMinor = fromIntegral $ GhcStats.gcs rts - GhcStats.major_gcs rts + , rAlloc = GhcStats.allocated_bytes rts + , rLive = GhcStats.gcdetails_live_bytes $ GhcStats.gc rts + , rHeap = GhcStats.gcdetails_mem_in_use_bytes $ GhcStats.gc rts + , rRSS = rss * 4096 -- TODO: this is really PAGE_SIZE. + , .. + } + mkProcStats _ _ _ _ = Nothing + nsToCenti :: GhcStats.RtsTime -> Word64 + nsToCenti = floor . (/ (10000000 :: Double)) . fromIntegral + +readProcList :: FilePath -> IO [Integer] +readProcList fp = do + fs <- getFileStatus fp + if readable fs + then do + cs <- T.readFile fp + return $ map (fromMaybe 0 . readMaybeText) (T.words cs) + else + return [] + where + readable fs = intersectFileModes (fileMode fs) ownerReadMode == ownerReadMode + +readMaybeText :: Integral a => T.Text -> Maybe a +readMaybeText t = + case T.decimal t of + Right (v, _) -> Just v + _ -> Nothing diff --git a/hermod-trace-resources/src/Hermod/Tracing/Resources/Types.hs b/hermod-trace-resources/src/Hermod/Tracing/Resources/Types.hs new file mode 100644 index 0000000..56f73cd --- /dev/null +++ b/hermod-trace-resources/src/Hermod/Tracing/Resources/Types.hs @@ -0,0 +1,186 @@ +{-# LANGUAGE CPP #-} +{-# LANGUAGE DeriveFunctor #-} +{-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE RecordWildCards #-} + +module Hermod.Tracing.Resources.Types + ( Resources(..) + , ResourceStats + ) where + + +import Cardano.Logging + +import Data.Aeson +import Data.Text (pack) +import Data.Word +import GHC.Generics (Generic) + +-- | A snapshot of resource usage for the current process, parameterised over +-- the numeric type @a@. The concrete alias 'ResourceStats' fixes @a ~ Word64@. +-- +-- Fields that cannot be measured on the current platform are set to @0@. +data Resources a + = Resources + { rCentiCpu :: !a + -- ^ CPU time in centiseconds (1\/100 s) since process start, + -- from @\/proc\/self\/stat@ on Linux or equivalent OS API. + , rCentiGC :: !a + -- ^ CPU centiseconds spent in the GHC garbage collector (RTS stats). + , rCentiMut :: !a + -- ^ CPU centiseconds spent in the mutator, i.e. application code (RTS stats). + , rGcsMajor :: !a + -- ^ Number of major (full-heap) GC runs since process start (RTS stats). + , rGcsMinor :: !a + -- ^ Number of minor GC runs since process start (RTS stats). + , rAlloc :: !a + -- ^ Cumulative bytes allocated in the heap since process start (RTS stats). + , rLive :: !a + -- ^ Live heap bytes immediately after the last GC run (RTS stats). + , rHeap :: !a + -- ^ Committed heap bytes (total heap size reserved from the OS) (RTS stats). + , rRSS :: !a + -- ^ Resident set size in bytes: physical memory currently mapped to + -- the process (from the OS kernel). + , rCentiBlkIO :: !a + -- ^ Centiseconds spent waiting for block I/O (Linux @\/proc\/self\/stat@ only; + -- @0@ on other platforms). + , rNetRd :: !a + -- ^ IP packet bytes received since boot (Linux @\/proc\/self\/net\/netstat@, + -- only when the @with-netstat@ flag is enabled; @0@ otherwise). + , rNetWr :: !a + -- ^ IP packet bytes transmitted since boot (Linux @\/proc\/self\/net\/netstat@, + -- only when the @with-netstat@ flag is enabled; @0@ otherwise). + , rFsRd :: !a + -- ^ Filesystem bytes read by the process (from @\/proc\/self\/io@ on Linux + -- or equivalent OS API). + , rFsWr :: !a + -- ^ Filesystem bytes written by the process (from @\/proc\/self\/io@ on Linux + -- or equivalent OS API). + , rThreads :: !a + -- ^ Number of live GHC green threads (RTS stats). + } + deriving (Functor, Generic, Show) + +-- | Concrete snapshot of resource usage with all fields as 'Word64' counts. +type ResourceStats = Resources Word64 + +instance Applicative Resources where + pure a = Resources a a a a a a a a a a a a a a a + f <*> x = + Resources + { rCentiCpu = rCentiCpu f (rCentiCpu x) + , rCentiGC = rCentiGC f (rCentiGC x) + , rCentiMut = rCentiMut f (rCentiMut x) + , rGcsMajor = rGcsMajor f (rGcsMajor x) + , rGcsMinor = rGcsMinor f (rGcsMinor x) + , rAlloc = rAlloc f (rAlloc x) + , rLive = rLive f (rLive x) + , rHeap = rHeap f (rHeap x) + , rRSS = rRSS f (rRSS x) + , rCentiBlkIO = rCentiBlkIO f (rCentiBlkIO x) + , rNetRd = rNetRd f (rNetRd x) + , rNetWr = rNetWr f (rNetWr x) + , rFsRd = rFsRd f (rFsRd x) + , rFsWr = rFsWr f (rFsWr x) + , rThreads = rThreads f (rThreads x) + } + +instance FromJSON a => FromJSON (Resources a) where + parseJSON = genericParseJSON jsonEncodingOptions + +instance ToJSON a => ToJSON (Resources a) where + toJSON = genericToJSON jsonEncodingOptions + toEncoding = genericToEncoding jsonEncodingOptions + +jsonEncodingOptions :: Options +jsonEncodingOptions = defaultOptions + { fieldLabelModifier = drop 1 + , tagSingleConstructors = True + , sumEncoding = + TaggedObject + { tagFieldName = "kind" + , contentsFieldName = "contents" + } + } + +instance LogFormatting ResourceStats where + forHuman Resources{..} = "Resources:" + <> " Cpu Ticks " <> (pack . show) rCentiCpu + <> ", GC centiseconds " <> (pack . show) rCentiGC + <> ", Mutator centiseconds " <> (pack . show) rCentiMut + <> ", GCs major " <> (pack . show) rGcsMajor + <> ", GCs minor " <> (pack . show) rGcsMinor + <> ", Allocated bytes " <> (pack . show) rAlloc + <>" , GC live bytes " <> (pack . show) rLive + <> ", RTS heap " <> (pack . show) rHeap + <> ", RSS " <> (pack . show) rRSS + <> ", Net bytes read " <> (pack . show) rNetRd + <> " written " <> (pack . show) rNetWr + <> ", FS bytes read " <> (pack . show) rFsRd + <> " written " <> (pack . show) rFsWr + <> ", Threads " <> (pack . show) rThreads + <> "." + + forMachine _dtal rs = mconcat + [ "kind" .= String "ResourceStats" + , "CentiCpu" .= Number (fromIntegral $ rCentiCpu rs) + , "CentiGC" .= Number (fromIntegral $ rCentiGC rs) + , "CentiMut" .= Number (fromIntegral $ rCentiMut rs) + , "GcsMajor" .= Number (fromIntegral $ rGcsMajor rs) + , "GcsMinor" .= Number (fromIntegral $ rGcsMinor rs) + , "Alloc" .= Number (fromIntegral $ rAlloc rs) + , "Live" .= Number (fromIntegral $ rLive rs) + , "Heap" .= Number (fromIntegral $ rHeap rs) + , "RSS" .= Number (fromIntegral $ rRSS rs) + , "CentiBlkIO" .= Number (fromIntegral $ rCentiBlkIO rs) + , "NetRd" .= Number (fromIntegral $ rNetRd rs) + , "NetWr" .= Number (fromIntegral $ rNetWr rs) + , "FsRd" .= Number (fromIntegral $ rFsRd rs) + , "FsWr" .= Number (fromIntegral $ rFsWr rs) + , "Threads" .= Number (fromIntegral $ rThreads rs) + ] + + asMetrics rs = + [ IntM "Stat.cputicks" (fromIntegral $ rCentiCpu rs) + , IntM "RTS.gcticks" (fromIntegral $ rCentiGC rs) + , IntM "RTS.mutticks" (fromIntegral $ rCentiMut rs) + , IntM "RTS.gcMajorNum" (fromIntegral $ rGcsMajor rs) + , IntM "RTS.gcMinorNum" (fromIntegral $ rGcsMinor rs) + , IntM "RTS.alloc" (fromIntegral $ rAlloc rs) + , IntM "RTS.gcLiveBytes" (fromIntegral $ rLive rs) + , IntM "RTS.gcHeapBytes" (fromIntegral $ rHeap rs) + , IntM "Mem.resident" (fromIntegral $ rRSS rs) + , IntM "Stat.blkIOticks" (fromIntegral $ rCentiBlkIO rs) + , IntM "Stat.netRd" (fromIntegral $ rNetRd rs) + , IntM "Stat.netWr" (fromIntegral $ rNetWr rs) + , IntM "Stat.fsRd" (fromIntegral $ rFsRd rs) + , IntM "Stat.fsWr" (fromIntegral $ rFsWr rs) + , IntM "RTS.threads" (fromIntegral $ rThreads rs) + ] + +instance MetaTrace ResourceStats where + namespaceFor Resources {} = + Namespace [] ["Resources"] + severityFor (Namespace _ ["Resources"]) _ = Just Info + severityFor _ns _ = Nothing + documentFor (Namespace _ ["Resources"]) = Just "" + documentFor _ns = Nothing + metricsDocFor (Namespace _ ["Resources"]) = + [("Stat.cputicks", "Kernel-reported CPU ticks (1/100th of a second), since process start") + ,("RTS.gcticks", "RTS-reported CPU ticks spent on GC") + ,("RTS.mutticks", "RTS-reported CPU ticks spent on mutator") + ,("RTS.gcMajorNum", "Major GCs") + ,("RTS.gcMinorNum", "Minor GCs") + ,("RTS.alloc", "RTS-reported bytes allocated") + ,("RTS.gcLiveBytes", "RTS-reported live bytes") + ,("RTS.gcHeapBytes", "RTS-reported heap bytes") + ,("Mem.resident", "Kernel-reported RSS (resident set size)") + ,("Stat.netRd", "IP packet bytes read") + ,("Stat.netWr", "IP packet bytes written") + ,("Stat.fsRd", "FS bytes read") + ,("Stat.fsWr", "FS bytes written") + ,("RTS.threads","RTS green thread count")] + metricsDocFor _ns = [] + allNamespaces = [ Namespace [] ["Resources"]] diff --git a/hermod-trace-resources/src/Hermod/Tracing/Resources/Windows.hsc b/hermod-trace-resources/src/Hermod/Tracing/Resources/Windows.hsc new file mode 100644 index 0000000..b78a638 --- /dev/null +++ b/hermod-trace-resources/src/Hermod/Tracing/Resources/Windows.hsc @@ -0,0 +1,179 @@ + +{-# LANGUAGE CPP #-} +{-# LANGUAGE ForeignFunctionInterface #-} +{-# LANGUAGE TypeApplications #-} + +module Hermod.Tracing.Resources.Windows + ( readResourceStatsInternal + ) where + + +import Data.Word (Word64) +import Foreign.C.Types +import Foreign.Marshal.Alloc +import Foreign.Marshal.Error +import Foreign.Ptr +import Foreign.Storable +import qualified GHC.Stats as GhcStats +import System.Win32.Process (ProcessId, getCurrentProcessId) +import System.Win32.Types + +import Hermod.Tracing.Resources.Types + + +-- use PsAPI version 2 +#define PSAPI_VERSION 2 + +#include +#include + +#include "os-support-win.h" + + +{- type aliases -} +type ULONGLONG = Word64 + +{- memory information -} + +{- https://docs.microsoft.com/de-de/windows/win32/api/psapi/ns-psapi-process_memory_counters +typedef struct _PROCESS_MEMORY_COUNTERS { + DWORD cb; + DWORD PageFaultCount; + SIZE_T PeakWorkingSetSize; + SIZE_T WorkingSetSize; + SIZE_T QuotaPeakPagedPoolUsage; + SIZE_T QuotaPagedPoolUsage; + SIZE_T QuotaPeakNonPagedPoolUsage; + SIZE_T QuotaNonPagedPoolUsage; + SIZE_T PagefileUsage; + SIZE_T PeakPagefileUsage; +} PROCESS_MEMORY_COUNTERS; -} + +data ProcessMemoryCounters = ProcessMemoryCounters + { _cb :: DWORD + , _pageFaultCount :: DWORD + , _peakWorkingSetSize :: SIZE_T + , _workingSetSize :: SIZE_T + , _quotaPeakPagedPoolUsage :: SIZE_T + , _quotaPagedPoolUsage :: SIZE_T + , _quotaPeakNonPagedPoolUsage :: SIZE_T + , _quotaNonPagedPoolUsage :: SIZE_T + , _pagefileUsage :: SIZE_T + , _peakPagefileUsage :: SIZE_T + } + +instance Storable ProcessMemoryCounters where + alignment _ = #const offsetof(struct {char x__; PROCESS_MEMORY_COUNTERS (y__); }, y__) + sizeOf _ = #size PROCESS_MEMORY_COUNTERS + peek ptr = ProcessMemoryCounters + <$> (#peek PROCESS_MEMORY_COUNTERS, cb) ptr + <*> (#peek PROCESS_MEMORY_COUNTERS, PageFaultCount) ptr + <*> (#peek PROCESS_MEMORY_COUNTERS, PeakWorkingSetSize) ptr + <*> (#peek PROCESS_MEMORY_COUNTERS, WorkingSetSize) ptr + <*> (#peek PROCESS_MEMORY_COUNTERS, QuotaPeakPagedPoolUsage) ptr + <*> (#peek PROCESS_MEMORY_COUNTERS, QuotaPagedPoolUsage) ptr + <*> (#peek PROCESS_MEMORY_COUNTERS, QuotaPeakNonPagedPoolUsage) ptr + <*> (#peek PROCESS_MEMORY_COUNTERS, QuotaNonPagedPoolUsage) ptr + <*> (#peek PROCESS_MEMORY_COUNTERS, PagefileUsage) ptr + <*> (#peek PROCESS_MEMORY_COUNTERS, PeakPagefileUsage) ptr + poke _ _ = pure () + +foreign import ccall unsafe c_get_process_memory_info2 :: Ptr ProcessMemoryCounters -> CInt -> IO CInt + +{- I/O counters -} +{- https://docs.microsoft.com/de-de/windows/win32/api/winnt/ns-winnt-io_counters +typedef struct _IO_COUNTERS { + ULONGLONG ReadOperationCount; + ULONGLONG WriteOperationCount; + ULONGLONG OtherOperationCount; + ULONGLONG ReadTransferCount; + ULONGLONG WriteTransferCount; + ULONGLONG OtherTransferCount; +} IO_COUNTERS; -} + +data IOCounters = IOCounters + { _readOperationCount :: ULONGLONG + , _writeOperationCount :: ULONGLONG + , _otherOperationCount :: ULONGLONG + , _readTransferCount :: ULONGLONG + , _writeTransferCount :: ULONGLONG + , _otherTransferCount :: ULONGLONG + } + +instance Storable IOCounters where + alignment _ = #const offsetof(struct {char x__; IO_COUNTERS (y__); }, y__) + sizeOf _ = #size IO_COUNTERS + peek ptr = IOCounters + <$> (#peek IO_COUNTERS, ReadOperationCount) ptr + <*> (#peek IO_COUNTERS, WriteOperationCount) ptr + <*> (#peek IO_COUNTERS, OtherOperationCount) ptr + <*> (#peek IO_COUNTERS, ReadTransferCount) ptr + <*> (#peek IO_COUNTERS, WriteTransferCount) ptr + <*> (#peek IO_COUNTERS, OtherTransferCount) ptr + poke _ _ = pure () + +data CpuTimes = CpuTimes { + usertime :: ULONGLONG + , systime :: ULONGLONG + , _idletime :: ULONGLONG + } + +instance Storable CpuTimes where + alignment _ = #const offsetof(struct {char x__; CPU_TIMES (y__); }, y__) + sizeOf _ = #size CPU_TIMES + peek ptr = CpuTimes + <$> (#peek CPU_TIMES, usertime) ptr + <*> (#peek CPU_TIMES, systime) ptr + <*> (#peek CPU_TIMES, idletime) ptr + poke _ _ = pure () + +foreign import ccall unsafe c_get_proc_cpu_times2 :: Ptr CpuTimes -> CInt -> IO CInt + + +getMemoryInfo :: ProcessId -> IO ProcessMemoryCounters +getMemoryInfo pid = + allocaBytes 128 $ \ptr -> do + throwIfNeg_ (\res -> "c_get_process_memory_info2: failure returned: " ++ show res) + (c_get_process_memory_info2 ptr (fromIntegral pid)) + peek ptr + +readResourceStatsInternal :: IO (Maybe ResourceStats) +readResourceStatsInternal = getCurrentProcessId >>= \pid -> do + cpu <- getCpuTimes pid + mem <- getMemoryInfo pid + rts <- GhcStats.getRTSStats + pure . Just $ + Resources + { rCentiCpu = usecsToCenti $ usertime cpu + systime cpu + , rCentiGC = nsToCenti $ GhcStats.gc_cpu_ns rts + , rCentiMut = nsToCenti $ GhcStats.mutator_cpu_ns rts + , rGcsMajor = fromIntegral $ GhcStats.major_gcs rts + , rGcsMinor = fromIntegral $ GhcStats.gcs rts - GhcStats.major_gcs rts + , rAlloc = GhcStats.allocated_bytes rts + , rLive = GhcStats.gcdetails_live_bytes $ GhcStats.gc rts + , rHeap = GhcStats.gcdetails_mem_in_use_bytes $ GhcStats.gc rts + , rRSS = fromIntegral (_workingSetSize mem) + , rCentiBlkIO = 0 + , rNetRd = 0 + , rNetWr = 0 + , rFsRd = 0 + , rFsWr = 0 + , rThreads = 0 + } + where + usecsToCenti :: ULONGLONG -> Word64 + usecsToCenti ul = ul `div` 10000 + nsToCenti :: GhcStats.RtsTime -> Word64 + nsToCenti = fromIntegral . (`div` 10000000) + + +getCpuTimes :: ProcessId -> IO CpuTimes +getCpuTimes pid = + allocaBytes 128 $ \ptr -> do + res <- c_get_proc_cpu_times2 ptr (fromIntegral pid) + if res <= 0 + then do + putStrLn $ "c_get_proc_cpu_times2: failure returned: " ++ (show res) + return $ CpuTimes 0 0 0 + else + peek ptr diff --git a/hermod-trace-resources/test/trace-resources-test.hs b/hermod-trace-resources/test/trace-resources-test.hs new file mode 100644 index 0000000..bc08aa4 --- /dev/null +++ b/hermod-trace-resources/test/trace-resources-test.hs @@ -0,0 +1,56 @@ +import Cardano.Logging +import Hermod.Tracing.Resources + +import Control.Monad.IO.Class +import Data.IORef + +import Test.Tasty +import Test.Tasty.QuickCheck + + +main :: IO () +main = defaultMain tests + +tests :: TestTree +tests = localOption (QuickCheckTests 10) $ testGroup "trace-resources" + [ testProperty "resources available" playScript + ] + +-- | Plays a script in a single thread +playScript :: Property +playScript = ioProperty $ do + stdoutTrRef <- newIORef [] + stdoutTracer' <- testTracer stdoutTrRef + forwardTrRef <- newIORef [] + forwardTracer' <- testTracer forwardTrRef + ekgTrRef <- newIORef [] + ekgTracer' <- testTracer ekgTrRef + tr <- mkCardanoTracer + stdoutTracer' + forwardTracer' + (Just ekgTracer') + ["Test"] + confState <- emptyConfigReflection + configureTracers confState emptyTraceConfig [tr] + traceIt tr 10 + +traceIt :: Trace IO ResourceStats -> Int -> IO Bool +traceIt _ 0 = pure True +traceIt tr n = do + mbResources <- readResourceStats + case mbResources of + Nothing -> pure False + Just res -> do + traceWith tr res + traceIt tr (n - 1) + + +testTracer :: MonadIO m + => IORef [FormattedMessage] + -> m (Trace m FormattedMessage) +testTracer ioRef = liftIO $ do + pure $ Trace $ arrow $ emit output' + where + output' (LoggingContext{}, Right msg) = liftIO $ do + modifyIORef ioRef (msg :) + output' (LoggingContext{}, _) = pure () diff --git a/perSystem/devShells.nix b/perSystem/devShells.nix index 6951b29..085b47a 100644 --- a/perSystem/devShells.nix +++ b/perSystem/devShells.nix @@ -1,7 +1,7 @@ { inputs, ... }: { perSystem = { shellFor, pkgs, ... }: { devShells.default = shellFor { - packages = p: [ p.trace-dispatcher p.hermod-recon-framework ]; + packages = p: [ p.trace-dispatcher p.hermod-recon-framework p.hermod-trace-resources]; nativeBuildInputs = [ pkgs.jq diff --git a/perSystem/packages.nix b/perSystem/packages.nix index 72aaf6d..481e43a 100644 --- a/perSystem/packages.nix +++ b/perSystem/packages.nix @@ -3,14 +3,18 @@ let td = hsPkgs.trace-dispatcher; hrf = hsPkgs.hermod-recon-framework; + htr = hsPkgs.hermod-trace-resources; in { - packages.trace-dispatcher = td.components.library; - checks.trace-dispatcher-test = td.components.tests.trace-dispatcher-test; + packages.trace-dispatcher = td.components.library; + checks.trace-dispatcher-test = td.components.tests.trace-dispatcher-test; - packages.hermod-recon = hrf.components.exes.hermod-recon; - packages.hermod-recon-grep = hrf.components.exes.hermod-recon-grep; - checks.hermod-recon-test = hrf.components.tests.hermod-recon-test; - checks.hermod-recon-integration-test = hrf.components.tests.hermod-recon-integration-test; + packages.hermod-recon = hrf.components.exes.hermod-recon; + packages.hermod-recon-grep = hrf.components.exes.hermod-recon-grep; + checks.hermod-recon-test = hrf.components.tests.hermod-recon-test; + checks.hermod-recon-integration-test = hrf.components.tests.hermod-recon-integration-test; + + packages.hermod-trace-resources = htr.components.library; + checks.hermod-trace-resources-test = htr.components.tests.trace-resources-test; }; } diff --git a/perSystem/project.nix b/perSystem/project.nix index 254184d..4299e76 100644 --- a/perSystem/project.nix +++ b/perSystem/project.nix @@ -13,7 +13,8 @@ }; modules = [{ - packages.trace-dispatcher.ghcOptions = [ "-Werror" "-fno-ignore-asserts" ]; + packages.trace-dispatcher.ghcOptions = [ "-Werror" "-fno-ignore-asserts" ]; + packages.hermod-trace-resources.ghcOptions = [ "-Werror" "-fno-ignore-asserts" ]; packages.hermod-recon-framework.ghcOptions = [ "-Werror" "-fno-ignore-asserts" ]; }]; });