diff --git a/crates/fspy/examples/cli.rs b/crates/fspy/examples/cli.rs index 3fa315a7..1de519c0 100644 --- a/crates/fspy/examples/cli.rs +++ b/crates/fspy/examples/cli.rs @@ -29,13 +29,9 @@ async fn main() -> anyhow::Result<()> { for acc in termination.path_accesses.iter() { path_count += 1; + let path_str = format!("{:?}", acc.path); let mode_str = format!("{:?}", acc.mode); - csv_writer - .write_record(&[ - acc.path.to_cow_os_str().to_string_lossy().as_ref().as_bytes(), - mode_str.as_bytes(), - ]) - .await?; + csv_writer.write_record(&[path_str.as_bytes(), mode_str.as_bytes()]).await?; } csv_writer.flush().await?; diff --git a/crates/fspy/src/unix/mod.rs b/crates/fspy/src/unix/mod.rs index c42cf4de..72b5dac0 100644 --- a/crates/fspy/src/unix/mod.rs +++ b/crates/fspy/src/unix/mod.rs @@ -94,8 +94,8 @@ impl SpyImpl { &mut exec, ExecResolveConfig::search_path_enabled(None), &encoded_payload, - |path_access| { - exec_resolve_accesses.add(path_access); + |mode, path| { + exec_resolve_accesses.add(PathAccess { mode, path: path.into() }); }, ) .map_err(|err| SpawnError::Injection(err.into()))?; diff --git a/crates/fspy/src/unix/syscall_handler/mod.rs b/crates/fspy/src/unix/syscall_handler/mod.rs index 6326c9cc..bfed7d94 100644 --- a/crates/fspy/src/unix/syscall_handler/mod.rs +++ b/crates/fspy/src/unix/syscall_handler/mod.rs @@ -15,7 +15,7 @@ use fspy_seccomp_unotify::{ impl_handler, supervisor::handler::arg::{CStrPtr, Caller, Fd}, }; -use fspy_shared::ipc::{AccessMode, NativeStr, PathAccess}; +use fspy_shared::ipc::{AccessMode, PathAccess}; use crate::arena::PathAccessArena; @@ -63,7 +63,7 @@ impl SyscallHandler { libc::O_WRONLY => AccessMode::WRITE, _ => AccessMode::READ, }, - path: NativeStr::from_bytes(path.as_os_str().as_bytes()), + path: path.as_os_str().into(), }); Ok(()) } @@ -72,7 +72,7 @@ impl SyscallHandler { let path = fd.get_path(caller)?; self.arena.add(PathAccess { mode: AccessMode::READ_DIR, - path: NativeStr::from_bytes(path.as_bytes()), + path: OsStr::from_bytes(path.as_bytes()).into(), }); Ok(()) } diff --git a/crates/fspy/tests/oxlint.rs b/crates/fspy/tests/oxlint.rs index 28a33a80..8c05b165 100644 --- a/crates/fspy/tests/oxlint.rs +++ b/crates/fspy/tests/oxlint.rs @@ -102,9 +102,9 @@ declare const _foo: Foo; // Run oxlint without --type-aware first let accesses = track_oxlint(&tmpdir_path, &[""]).await?; let access_to_types_ts = accesses.iter().find(|access| { - let os_str = access.path.to_cow_os_str(); - os_str.as_encoded_bytes().ends_with(b"\\types.ts") - || os_str.as_encoded_bytes().ends_with(b"/types.ts") + access + .path + .strip_path_prefix(&tmpdir_path, |result| result.is_ok_and(|p| p.ends_with("types.ts"))) }); assert_eq!(access_to_types_ts, None, "oxlint should not read types.ts without --type-aware"); diff --git a/crates/fspy_e2e/src/main.rs b/crates/fspy_e2e/src/main.rs index 40d734c6..8e9f1c59 100644 --- a/crates/fspy_e2e/src/main.rs +++ b/crates/fspy_e2e/src/main.rs @@ -38,18 +38,18 @@ impl AccessCollector { } pub fn add(&mut self, access: PathAccess) { - let path = PathBuf::from(access.path.to_cow_os_str().to_os_string()); - if let Ok(relative_path) = path.strip_prefix(&self.dir) { - let relative_path = - relative_path.to_str().expect("relative path should be valid UTF-8").to_owned(); - match self.accesses.entry(relative_path) { - Entry::Vacant(vacant) => { - vacant.insert(access.mode); - } - Entry::Occupied(mut occupied) => { - let occupied_mode = occupied.get_mut(); - occupied_mode.insert(access.mode); - } + let Some(relative_path) = access.path.strip_path_prefix(&self.dir, |result| { + result.ok().and_then(|p| p.to_str().map(str::to_owned)) + }) else { + return; + }; + match self.accesses.entry(relative_path) { + Entry::Vacant(vacant) => { + vacant.insert(access.mode); + } + Entry::Occupied(mut occupied) => { + let occupied_mode = occupied.get_mut(); + occupied_mode.insert(access.mode); } } } diff --git a/crates/fspy_preload_unix/src/client/mod.rs b/crates/fspy_preload_unix/src/client/mod.rs index 1604f25c..9e12a435 100644 --- a/crates/fspy_preload_unix/src/client/mod.rs +++ b/crates/fspy_preload_unix/src/client/mod.rs @@ -2,7 +2,8 @@ pub mod convert; pub mod raw_exec; use std::{ - ffi::OsStr, fmt::Debug, num::NonZeroUsize, os::unix::ffi::OsStrExt as _, sync::OnceLock, + ffi::OsStr, fmt::Debug, num::NonZeroUsize, os::unix::ffi::OsStrExt as _, path::Path, + sync::OnceLock, }; use bincode::{enc::write::SizeWriter, encode_into_slice, encode_into_writer}; @@ -58,18 +59,19 @@ impl Client { Self { encoded_payload, ipc_sender } } - fn send(&self, path_access: PathAccess<'_>) -> anyhow::Result<()> { + fn send(&self, mode: fspy_shared::ipc::AccessMode, path: &Path) -> anyhow::Result<()> { let Some(ipc_sender) = &self.ipc_sender else { // ipc channel not available, skip sending return Ok(()); }; - let path = path_access.path.as_os_str().as_bytes(); - if path.starts_with(b"/dev/") + let path_bytes = path.as_os_str().as_bytes(); + if path_bytes.starts_with(b"/dev/") || (cfg!(target_os = "linux") - && (path.starts_with(b"/proc/") || path.starts_with(b"/sys/"))) + && (path_bytes.starts_with(b"/proc/") || path_bytes.starts_with(b"/sys/"))) { return Ok(()); } + let path_access = PathAccess { mode, path: path.into() }; let mut size_writer = SizeWriter::default(); encode_into_writer(path_access, &mut size_writer, BINCODE_CONFIG)?; @@ -93,8 +95,8 @@ impl Client { ) -> nix::Result { // SAFETY: raw_exec contains valid pointers to C strings and null-terminated arrays, as provided by the caller let mut exec = unsafe { raw_exec.to_exec() }; - let pre_exec = handle_exec(&mut exec, config, &self.encoded_payload, |path_access| { - self.send(path_access).unwrap(); + let pre_exec = handle_exec(&mut exec, config, &self.encoded_payload, |mode, path| { + self.send(mode, path).unwrap(); })?; RawExec::from_exec(exec, |raw_command| f(raw_command, pre_exec)) } @@ -112,7 +114,7 @@ impl Client { let Some(abs_path) = abs_path else { return Ok(Ok(())); }; - Ok(self.send(PathAccess { mode, path: OsStr::from_bytes(abs_path).into() })) + Ok(self.send(mode, Path::new(OsStr::from_bytes(abs_path)))) }) }??; diff --git a/crates/fspy_preload_windows/src/windows/detours/nt.rs b/crates/fspy_preload_windows/src/windows/detours/nt.rs index da85eb8b..ffcfd1ae 100644 --- a/crates/fspy_preload_windows/src/windows/detours/nt.rs +++ b/crates/fspy_preload_windows/src/windows/detours/nt.rs @@ -1,4 +1,4 @@ -use fspy_shared::ipc::{AccessMode, NativeStr, PathAccess}; +use fspy_shared::ipc::{AccessMode, NativePath, PathAccess}; use ntapi::ntioapi::{ FILE_INFORMATION_CLASS, NtQueryDirectoryFile, NtQueryFullAttributesFile, NtQueryInformationByName, PFILE_BASIC_INFORMATION, PFILE_NETWORK_OPEN_INFORMATION, @@ -157,7 +157,7 @@ unsafe fn handle_open(access_mode: impl ToAccessMode, path: impl ToAbsolutePath) // SAFETY: converting access mask to AccessMode via FFI-aware trait PathAccess { mode: access_mode.to_access_mode(), - path: NativeStr::from_wide(path), + path: NativePath::from_wide(path), } }, |wildcard_pos| { @@ -168,7 +168,7 @@ unsafe fn handle_open(access_mode: impl ToAccessMode, path: impl ToAbsolutePath) .unwrap_or(0); PathAccess { mode: AccessMode::READ_DIR, - path: NativeStr::from_wide(&path[..slash_pos]), + path: NativePath::from_wide(&path[..slash_pos]), } }, ); diff --git a/crates/fspy_shared/src/ipc/mod.rs b/crates/fspy_shared/src/ipc/mod.rs index 14d9bab7..755627d3 100644 --- a/crates/fspy_shared/src/ipc/mod.rs +++ b/crates/fspy_shared/src/ipc/mod.rs @@ -1,10 +1,12 @@ pub mod channel; -mod native_str; +mod native_path; +pub(crate) mod native_str; use std::fmt::Debug; use bincode::{BorrowDecode, Encode, config::Configuration}; use bitflags::bitflags; +pub use native_path::NativePath; pub use native_str::NativeStr; pub const BINCODE_CONFIG: Configuration = bincode::config::standard(); @@ -35,16 +37,16 @@ impl Debug for AccessMode { #[derive(Encode, BorrowDecode, Debug, Clone, Copy, PartialEq, Eq)] pub struct PathAccess<'a> { pub mode: AccessMode, - pub path: &'a NativeStr, + pub path: &'a NativePath, // TODO: add follow_symlinks (O_NOFOLLOW) } impl<'a> PathAccess<'a> { - pub fn read(path: impl Into<&'a NativeStr>) -> Self { + pub fn read(path: impl Into<&'a NativePath>) -> Self { Self { mode: AccessMode::READ, path: path.into() } } - pub fn read_dir(path: impl Into<&'a NativeStr>) -> Self { + pub fn read_dir(path: impl Into<&'a NativePath>) -> Self { Self { mode: AccessMode::READ_DIR, path: path.into() } } } diff --git a/crates/fspy_shared/src/ipc/native_path.rs b/crates/fspy_shared/src/ipc/native_path.rs new file mode 100644 index 00000000..072ed473 --- /dev/null +++ b/crates/fspy_shared/src/ipc/native_path.rs @@ -0,0 +1,104 @@ +#[cfg(unix)] +use std::os::unix::ffi::OsStrExt as _; +use std::{ + ffi::OsStr, + fmt::Debug, + path::{Path, StripPrefixError}, +}; + +use allocator_api2::alloc::Allocator; +use bincode::{BorrowDecode, Encode, de::BorrowDecoder, error::DecodeError}; +use bytemuck::TransparentWrapper; + +use super::native_str::NativeStr; + +/// An opaque path type used in [`super::PathAccess`]. +/// +/// On Windows, tracked paths are NT Object Manager paths (`\??` prefix), +/// whose raw data is not meaningful for direct consumption. The only way +/// to use the path is through [`strip_path_prefix`](NativePath::strip_path_prefix), +/// which normalizes platform differences and extracts a workspace-relative path. +#[derive(TransparentWrapper, Encode, PartialEq, Eq)] +#[repr(transparent)] +pub struct NativePath { + inner: NativeStr, +} + +impl NativePath { + #[cfg(windows)] + #[must_use] + pub fn from_wide(wide: &[u16]) -> &Self { + Self::wrap_ref(NativeStr::from_wide(wide)) + } + + pub fn clone_in<'new_alloc, A>(&self, alloc: &'new_alloc A) -> &'new_alloc Self + where + &'new_alloc A: Allocator, + { + Self::wrap_ref(self.inner.clone_in(alloc)) + } + + pub fn strip_path_prefix, R, F: FnOnce(Result<&Path, StripPrefixError>) -> R>( + &self, + base: P, + f: F, + ) -> R { + /// Strip the `\\?\`, `\\.\`, `\??\` prefix from a Windows path, if present. + /// Does nothing on non-Windows platforms. + /// + /// \\?\ and \\.\ are used to enable long paths and access to device paths. + /// \??\ is used in Nt* calls. + /// The resulting path is not necessarily valid or points to the same location, + /// but it's good enough for sanitizing paths in `NativePath::strip_path_prefix`. + #[cfg_attr( + not(windows), + expect( + clippy::missing_const_for_fn, + reason = "uses non-const for loop and strip_prefix on Windows" + ) + )] + fn strip_windows_path_prefix(p: &OsStr) -> &OsStr { + #[cfg(windows)] + { + use os_str_bytes::OsStrBytesExt as _; + for prefix in [r"\\?\", r"\\.\", r"\??\"] { + if let Some(stripped) = p.strip_prefix(prefix) { + return stripped; + } + } + p + } + #[cfg(not(windows))] + { + p + } + } + + let me = self.inner.to_cow_os_str(); + let me = strip_windows_path_prefix(&me); + let base = strip_windows_path_prefix(base.as_ref().as_os_str()); + f(Path::new(me).strip_prefix(base)) + } +} + +impl Debug for NativePath { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + ::fmt(&self.inner, f) + } +} + +impl<'a, C> BorrowDecode<'a, C> for &'a NativePath { + fn borrow_decode>( + decoder: &mut D, + ) -> Result { + let inner: &'a NativeStr = BorrowDecode::borrow_decode(decoder)?; + Ok(NativePath::wrap_ref(inner)) + } +} + +#[cfg(unix)] +impl<'a, S: AsRef + ?Sized> From<&'a S> for &'a NativePath { + fn from(value: &'a S) -> Self { + NativePath::wrap_ref(NativeStr::from_bytes(value.as_ref().as_bytes())) + } +} diff --git a/crates/fspy_shared/src/ipc/native_str.rs b/crates/fspy_shared/src/ipc/native_str.rs index 66f03811..d2d14e3a 100644 --- a/crates/fspy_shared/src/ipc/native_str.rs +++ b/crates/fspy_shared/src/ipc/native_str.rs @@ -6,12 +6,7 @@ use std::os::unix::ffi::OsStrExt as _; use std::os::windows::ffi::OsStrExt as _; #[cfg(windows)] use std::os::windows::ffi::OsStringExt as _; -use std::{ - borrow::Cow, - ffi::OsStr, - fmt::Debug, - path::{Path, StripPrefixError}, -}; +use std::{borrow::Cow, ffi::OsStr, fmt::Debug}; use allocator_api2::alloc::Allocator; use bincode::{ @@ -141,48 +136,6 @@ impl NativeStr { let data = data.leak::<'new_alloc>(); Self::wrap_ref(data) } - - pub fn strip_path_prefix, R, F: FnOnce(Result<&Path, StripPrefixError>) -> R>( - &self, - base: P, - f: F, - ) -> R { - /// Strip the `\\?\`, `\\.\`, `\??\` prefix from a Windows path, if present. - /// Does nothing on non-Windows platforms. - /// - /// \\?\ and \\.\ are used to enable long paths and access to device paths. - /// \??\ is used in Nt* calls. - /// The resulting path is not necessarily valid or points to the same location, - /// but it's good enough for sanitizing paths in `NativeStr::strip_path_prefix`. - #[cfg_attr( - not(windows), - expect( - clippy::missing_const_for_fn, - reason = "uses non-const for loop and strip_prefix on Windows" - ) - )] - fn strip_windows_path_prefix(p: &OsStr) -> &OsStr { - #[cfg(windows)] - { - use os_str_bytes::OsStrBytesExt as _; - for prefix in [r"\\?\", r"\\.\", r"\??\"] { - if let Some(stripped) = p.strip_prefix(prefix) { - return stripped; - } - } - p - } - #[cfg(not(windows))] - { - p - } - } - - let me = self.to_cow_os_str(); - let me = strip_windows_path_prefix(&me); - let base = strip_windows_path_prefix(base.as_ref().as_os_str()); - f(Path::new(me).strip_prefix(base)) - } } #[cfg(test)] diff --git a/crates/fspy_shared_unix/src/exec/mod.rs b/crates/fspy_shared_unix/src/exec/mod.rs index bfe8e3ea..d794b8ae 100644 --- a/crates/fspy_shared_unix/src/exec/mod.rs +++ b/crates/fspy_shared_unix/src/exec/mod.rs @@ -11,7 +11,7 @@ use std::{ }; use bstr::{BStr, BString, ByteSlice}; -use fspy_shared::ipc::{AccessMode, PathAccess}; +use fspy_shared::ipc::AccessMode; use nix::unistd::{AccessFlags, access}; use shebang::{ParseShebangOptions, parse_shebang}; @@ -103,7 +103,7 @@ impl Exec { /// - Shebang parsing fails due to I/O errors (`EIO`) pub fn resolve( &mut self, - mut on_path_access: impl FnMut(PathAccess<'_>), + mut on_path_access: impl FnMut(AccessMode, &Path), config: ExecResolveConfig, ) -> nix::Result<()> { if let Some(search_path) = config.search_path { @@ -120,10 +120,7 @@ impl Exec { self.program.as_ref(), path, |path| { - on_path_access(PathAccess { - path: OsStr::from_bytes(path).into(), - mode: AccessMode::READ, - }); + on_path_access(AccessMode::READ, Path::new(OsStr::from_bytes(path))); access(OsStr::from_bytes(path), AccessFlags::X_OK) }, |program| Ok(program.to_owned()), @@ -138,12 +135,12 @@ impl Exec { fn parse_shebang( &mut self, - mut on_path_access: impl FnMut(PathAccess<'_>), + mut on_path_access: impl FnMut(AccessMode, &Path), options: ParseShebangOptions, ) -> nix::Result<()> { if let Some(shebang) = parse_shebang( |path, buf| { - on_path_access(PathAccess::read(path)); + on_path_access(AccessMode::READ, path); peek_executable(path, buf) }, Path::new(OsStr::from_bytes(&self.program)), diff --git a/crates/fspy_shared_unix/src/spawn/mod.rs b/crates/fspy_shared_unix/src/spawn/mod.rs index 6b36fcdb..e4812577 100644 --- a/crates/fspy_shared_unix/src/spawn/mod.rs +++ b/crates/fspy_shared_unix/src/spawn/mod.rs @@ -6,9 +6,9 @@ mod os_specific; #[path = "./macos.rs"] mod os_specific; -use std::{ffi::OsStr, os::unix::ffi::OsStrExt}; +use std::{ffi::OsStr, os::unix::ffi::OsStrExt, path::Path}; -use fspy_shared::ipc::{AccessMode, PathAccess}; +use fspy_shared::ipc::AccessMode; #[doc(hidden)] #[cfg(target_os = "macos")] pub use os_specific::COREUTILS_FUNCTIONS as COREUTILS_FUNCTIONS_FOR_TEST; @@ -38,23 +38,19 @@ pub fn handle_exec( command: &mut Exec, config: ExecResolveConfig, encoded_payload: &EncodedPayload, - mut on_path_access: impl FnMut(PathAccess<'_>), + mut on_path_access: impl FnMut(AccessMode, &Path), ) -> nix::Result> { - let mut on_path_access = |path_access: PathAccess<'_>| { - if path_access.path.as_os_str().as_bytes().first() == Some(&b'/') { - on_path_access(path_access); + let mut on_path_access = |mode: AccessMode, path: &Path| { + if path.is_absolute() { + on_path_access(mode, path); } else { - let path = - std::path::absolute(path_access.path.as_os_str()).expect("Failed to get cwd"); - on_path_access(PathAccess { path: path.as_path().into(), mode: path_access.mode }); + let path = std::path::absolute(path).expect("Failed to get cwd"); + on_path_access(mode, &path); } }; command.resolve(&mut on_path_access, config)?; - on_path_access(PathAccess { - mode: AccessMode::READ, - path: OsStr::from_bytes(&command.program).into(), - }); + on_path_access(AccessMode::READ, Path::new(OsStr::from_bytes(&command.program))); os_specific::handle_exec(command, encoded_payload) }