From 0f0199fc53ce01c0a1d4e17bf73534a92580098c Mon Sep 17 00:00:00 2001 From: Daniel Golding Date: Sat, 24 May 2025 14:54:19 +0200 Subject: [PATCH 1/3] Fallback to /proc/PID/mem when process_vm_readv not in kernel Kernels may build with the CONFIG_CROSS_MEMORY_ATTACH configuration option not set, in this case process_vm_readv returns ENOSYS. The /proc/PID/mem file in the proc(5) filesystem serves the same purpose but is less efficient as the data must transfer through the kernel. We use this as a fallback. Signed-off-by: Daniel Golding --- news/240.feature.rst | 1 + src/pystack/_pystack/mem.cpp | 37 ++++++++++++++++++++++++++++++++++++ src/pystack/_pystack/mem.h | 6 +++++- 3 files changed, 43 insertions(+), 1 deletion(-) create mode 100644 news/240.feature.rst diff --git a/news/240.feature.rst b/news/240.feature.rst new file mode 100644 index 00000000..053975b9 --- /dev/null +++ b/news/240.feature.rst @@ -0,0 +1 @@ +Support kernels that don't have CONFIG_CROSS_MEMORY_ATTACH set. diff --git a/src/pystack/_pystack/mem.cpp b/src/pystack/_pystack/mem.cpp index 4590fead..e25c69f0 100644 --- a/src/pystack/_pystack/mem.cpp +++ b/src/pystack/_pystack/mem.cpp @@ -228,6 +228,16 @@ ProcessMemoryManager::ProcessMemoryManager(pid_t pid) ssize_t ProcessMemoryManager::readChunk(remote_addr_t addr, size_t len, char* dst) const +{ + if (d_memfile.is_open()) { + return readChunkThroughMemFile(addr, len, dst); + } else { + return readChunkDirect(addr, len, dst); + } +} + +ssize_t +ProcessMemoryManager::readChunkDirect(remote_addr_t addr, size_t len, char* dst) const { struct iovec local[1]; struct iovec remote[1]; @@ -246,6 +256,9 @@ ProcessMemoryManager::readChunk(remote_addr_t addr, size_t len, char* dst) const throw InvalidRemoteAddress(); } else if (errno == EPERM) { throw std::runtime_error(PERM_MESSAGE); + } else if (errno == ENOSYS) { + LOG(DEBUG) << "process_vm_readv not compiled in kernel, falling back to /proc/PID/mem"; + return readChunkThroughMemFile(addr, len, dst); } throw std::system_error(errno, std::generic_category()); } @@ -256,6 +269,30 @@ ProcessMemoryManager::readChunk(remote_addr_t addr, size_t len, char* dst) const return result; } +ssize_t +ProcessMemoryManager::readChunkThroughMemFile(remote_addr_t addr, size_t len, char* dst) const +{ + if (!d_memfile.is_open()) { + std::string filepath = "/proc/" + std::to_string(d_pid) + "/mem"; + d_memfile.open(filepath, std::ifstream::binary); + if (!d_memfile) { + LOG(ERROR) << "Failed to open file " << filepath; + if (-1 == open(filepath.c_str(), O_RDONLY)) { + if (errno == EPERM || errno == EACCES) { + throw std::runtime_error(PERM_MESSAGE); + } + throw std::system_error(errno, std::generic_category()); + } + throw std::runtime_error("Failed to open " + filepath); + } + } + d_memfile.seekg(addr); + if (!d_memfile.read((char*)dst, len)) { + throw InvalidRemoteAddress(); + } + return d_memfile.gcount(); +} + ssize_t ProcessMemoryManager::copyMemoryFromProcess(remote_addr_t addr, size_t len, void* dst) const { diff --git a/src/pystack/_pystack/mem.h b/src/pystack/_pystack/mem.h index dd4ef557..c7ba7925 100644 --- a/src/pystack/_pystack/mem.h +++ b/src/pystack/_pystack/mem.h @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -12,7 +13,7 @@ #include #include -#include +#include "elf_common.h" namespace pystack { typedef uintptr_t remote_addr_t; @@ -174,9 +175,12 @@ class ProcessMemoryManager : public AbstractRemoteMemoryManager pid_t d_pid; std::vector d_vmaps; mutable LRUCache d_lru_cache; + mutable std::ifstream d_memfile{}; // Methods ssize_t readChunk(remote_addr_t addr, size_t len, char* dst) const; + ssize_t readChunkDirect(remote_addr_t addr, size_t len, char* dst) const; + ssize_t readChunkThroughMemFile(remote_addr_t addr, size_t len, char* dst) const; }; struct SimpleVirtualMap From 379dbd372c69f4c94eb67ea00fcd10378d0d1b46 Mon Sep 17 00:00:00 2001 From: Matt Wozniski Date: Tue, 15 Jul 2025 18:35:11 -0400 Subject: [PATCH 2/3] Use stdio rather than ifstream This should both be a bit faster, and makes the error reporting a bit simpler. And incidentally this fixes a file descriptor leak as well. Signed-off-by: Matt Wozniski --- src/pystack/_pystack/mem.cpp | 25 ++++++++++++------------- src/pystack/_pystack/mem.h | 5 +++-- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/pystack/_pystack/mem.cpp b/src/pystack/_pystack/mem.cpp index e25c69f0..4e14fbf9 100644 --- a/src/pystack/_pystack/mem.cpp +++ b/src/pystack/_pystack/mem.cpp @@ -17,7 +17,6 @@ namespace pystack { using elf_unique_ptr = std::unique_ptr>; -using file_unique_ptr = std::unique_ptr>; static ssize_t _process_vm_readv( @@ -229,7 +228,7 @@ ProcessMemoryManager::ProcessMemoryManager(pid_t pid) ssize_t ProcessMemoryManager::readChunk(remote_addr_t addr, size_t len, char* dst) const { - if (d_memfile.is_open()) { + if (d_memfile) { return readChunkThroughMemFile(addr, len, dst); } else { return readChunkDirect(addr, len, dst); @@ -272,25 +271,25 @@ ProcessMemoryManager::readChunkDirect(remote_addr_t addr, size_t len, char* dst) ssize_t ProcessMemoryManager::readChunkThroughMemFile(remote_addr_t addr, size_t len, char* dst) const { - if (!d_memfile.is_open()) { + if (!d_memfile) { std::string filepath = "/proc/" + std::to_string(d_pid) + "/mem"; - d_memfile.open(filepath, std::ifstream::binary); + d_memfile = file_unique_ptr(fopen(filepath.c_str(), "r"), fclose); if (!d_memfile) { - LOG(ERROR) << "Failed to open file " << filepath; - if (-1 == open(filepath.c_str(), O_RDONLY)) { - if (errno == EPERM || errno == EACCES) { - throw std::runtime_error(PERM_MESSAGE); - } - throw std::system_error(errno, std::generic_category()); + if (errno == EPERM || errno == EACCES) { + LOG(ERROR) << "Permission denied opening file " << filepath; + throw std::runtime_error(PERM_MESSAGE); } + LOG(ERROR) << "Failed to open file " << filepath << ": " << std::strerror(errno); throw std::runtime_error("Failed to open " + filepath); } } - d_memfile.seekg(addr); - if (!d_memfile.read((char*)dst, len)) { + fseeko(d_memfile.get(), addr, SEEK_SET); + if (static_cast(addr) != ftello(d_memfile.get()) + || len != fread(dst, 1, len, d_memfile.get())) + { throw InvalidRemoteAddress(); } - return d_memfile.gcount(); + return static_cast(len); } ssize_t diff --git a/src/pystack/_pystack/mem.h b/src/pystack/_pystack/mem.h index c7ba7925..b78b6674 100644 --- a/src/pystack/_pystack/mem.h +++ b/src/pystack/_pystack/mem.h @@ -2,7 +2,6 @@ #include #include -#include #include #include #include @@ -16,6 +15,8 @@ #include "elf_common.h" namespace pystack { + +using file_unique_ptr = std::unique_ptr>; typedef uintptr_t remote_addr_t; struct RemoteMemCopyError : public std::exception @@ -175,7 +176,7 @@ class ProcessMemoryManager : public AbstractRemoteMemoryManager pid_t d_pid; std::vector d_vmaps; mutable LRUCache d_lru_cache; - mutable std::ifstream d_memfile{}; + mutable file_unique_ptr d_memfile; // Methods ssize_t readChunk(remote_addr_t addr, size_t len, char* dst) const; From 769ac625ec6909bb20754ee4ef936afdcf15e62f Mon Sep 17 00:00:00 2001 From: Matt Wozniski Date: Tue, 15 Jul 2025 19:25:04 -0400 Subject: [PATCH 3/3] tests: Exercise fallback to /proc/PID/mem Add an environment variable to force reads from `/proc/PID/mem` instead of using `process_vm_readv` in order to give us a way to exercise both paths on the same kernel. Signed-off-by: Matt Wozniski --- src/pystack/_pystack/mem.cpp | 2 +- tests/integration/test_smoke.py | 12 +++++++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/pystack/_pystack/mem.cpp b/src/pystack/_pystack/mem.cpp index 4e14fbf9..825f3c62 100644 --- a/src/pystack/_pystack/mem.cpp +++ b/src/pystack/_pystack/mem.cpp @@ -228,7 +228,7 @@ ProcessMemoryManager::ProcessMemoryManager(pid_t pid) ssize_t ProcessMemoryManager::readChunk(remote_addr_t addr, size_t len, char* dst) const { - if (d_memfile) { + if (d_memfile || getenv("_PYSTACK_NO_PROCESS_VM_READV") != nullptr) { return readChunkThroughMemFile(addr, len, dst); } else { return readChunkDirect(addr, len, dst); diff --git a/tests/integration/test_smoke.py b/tests/integration/test_smoke.py index 1a3278c6..a370b40d 100644 --- a/tests/integration/test_smoke.py +++ b/tests/integration/test_smoke.py @@ -38,13 +38,23 @@ @pytest.mark.parametrize("method", STACK_METHODS) @pytest.mark.parametrize("blocking", [True, False], ids=["blocking", "non-blocking"]) -def test_simple_execution(method, blocking, tmpdir): +@pytest.mark.parametrize( + "use_process_vm_readv", [True, False], ids=["process_vm_readv", "/proc/PID/mem"] +) +def test_simple_execution(method, blocking, use_process_vm_readv, tmpdir, monkeypatch): """Test that we can retrieve the thread state of a single process. This test is specially useful to run under valgrind or similar tools and to quickly check that the very basic functionality works as expected. """ + # GIVEN + env_var = "_PYSTACK_NO_PROCESS_VM_READV" + if use_process_vm_readv: + monkeypatch.delenv(env_var, raising=False) + else: + monkeypatch.setenv(env_var, "1") + # WHEN with spawn_child_process( sys.executable, TEST_SINGLE_THREAD_FILE, tmpdir