From c40c792282bdaa529637134d175e2af56d8f4915 Mon Sep 17 00:00:00 2001 From: Ranjeet Kumar Ranjan Date: Wed, 13 Jul 2022 15:05:33 +0530 Subject: [PATCH 01/12] Add RealPath module --- src/Streamly/Coreutils/RealPath.hs | 21 +++++++++++++++++++++ streamly-coreutils.cabal | 1 + 2 files changed, 22 insertions(+) create mode 100644 src/Streamly/Coreutils/RealPath.hs diff --git a/src/Streamly/Coreutils/RealPath.hs b/src/Streamly/Coreutils/RealPath.hs new file mode 100644 index 00000000..96da01f9 --- /dev/null +++ b/src/Streamly/Coreutils/RealPath.hs @@ -0,0 +1,21 @@ +-- | +-- Module : Streamly.Coreutils.RealPath +-- Copyright : (c) 2022 Composewell Technologies +-- License : BSD-3-Clause +-- Maintainer : streamly@composewell.com +-- Stability : experimental +-- Portability : GHC +-- +-- Returns resolved symbolic link target. + +module Streamly.Coreutils.RealPath + (realPath) +where + +import System.Directory (canonicalizePath ) + +-- | Make a path absolute, normalize the path, +-- and remove as many indirections from it as possible. + +realPath :: FilePath -> IO FilePath +realPath = canonicalizePath diff --git a/streamly-coreutils.cabal b/streamly-coreutils.cabal index 3892dd19..027d7687 100644 --- a/streamly-coreutils.cabal +++ b/streamly-coreutils.cabal @@ -130,6 +130,7 @@ library , Streamly.Coreutils.Mkdir , Streamly.Coreutils.Mv , Streamly.Coreutils.ReadLink + , Streamly.Coreutils.RealPath , Streamly.Coreutils.Rm , Streamly.Coreutils.Sh , Streamly.Coreutils.Sleep From bded37bb6868812f7ed438836628acf980d9ec35 Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Mon, 20 Apr 2026 07:06:50 +0530 Subject: [PATCH 02/12] Add options to realPath --- src/Streamly/Coreutils/RealPath.hs | 80 +++++++++++++++++++++++++++--- 1 file changed, 73 insertions(+), 7 deletions(-) diff --git a/src/Streamly/Coreutils/RealPath.hs b/src/Streamly/Coreutils/RealPath.hs index 96da01f9..55cf7ad2 100644 --- a/src/Streamly/Coreutils/RealPath.hs +++ b/src/Streamly/Coreutils/RealPath.hs @@ -6,16 +6,82 @@ -- Stability : experimental -- Portability : GHC -- --- Returns resolved symbolic link target. +-- Resolve a path to its canonical form: make it absolute, normalize +-- @.@ and @..@ segments, and follow every symbolic link along the way. +-- Corresponds to the shell @realpath@ command. +-- +-- By default the path does not need to exist — nonexistent trailing +-- components are preserved in the result. Use 'existenceCheck' to +-- require existence (matching GNU @realpath@'s default). +-- +-- == Caveats +-- +-- * On Windows, @subst@ drives are resolved through to their +-- underlying path. +-- * On POSIX, two paths referring to the same object are not +-- guaranteed to canonicalize identically (bind mounts, +-- case-insensitive filesystems, etc.). module Streamly.Coreutils.RealPath - (realPath) + ( RealPathOptions + , defaultConfig + , existenceCheck + , realPath + ) where -import System.Directory (canonicalizePath ) +import Control.Monad (when) +import System.Directory (canonicalizePath, doesPathExist) +-- import System.IO.Error (ioError, userError) + +-- = Design notes +-- +-- * Thin wrapper over 'System.Directory.canonicalizePath' from the +-- @directory@ package. Per the package design notes, we don't +-- reimplement what @directory@ already does well. +-- +-- * 'canonicalizePath' diverges from GNU @realpath@ on nonexistent +-- paths: it canonicalizes as much as it can rather than failing. +-- 'existenceCheck' restores the GNU default via a pre-check. +-- +-- * Options-driven API following the package convention even though +-- only one flag is currently honored. Leaves room for +-- @--relative-to@, @-s@ (no-symlinks), etc. without breaking the +-- signature. +-- +-- * Throws 'IOError' rather than returning 'Maybe'. A canonicalization +-- failure is an exceptional condition, not a lookup miss — matches +-- the error-handling guidance in the package design notes. + +-- | Options for 'realPath'. Construct via 'defaultConfig' and compose +-- modifiers with @(.)@. +newtype RealPathOptions = RealPathOptions + { _requireExistence :: Bool + } --- | Make a path absolute, normalize the path, --- and remove as many indirections from it as possible. +-- | Default configuration: does not require the path to exist. +defaultConfig :: RealPathOptions +defaultConfig = RealPathOptions { _requireExistence = False } -realPath :: FilePath -> IO FilePath -realPath = canonicalizePath +-- | Require that the path exists. Corresponds to GNU @realpath -e@. +-- Throws 'IOError' if the path does not exist. +existenceCheck :: RealPathOptions -> RealPathOptions +existenceCheck opts = opts { _requireExistence = True } + +-- | Resolve a path to its canonical absolute form: make it absolute, +-- normalize @.@ and @..@, and follow all symbolic links. +-- Corresponds to the shell @realpath@ command. +-- +-- Throws 'IOError' if the path cannot be canonicalized, or — when +-- 'existenceCheck' is set — if the path does not exist. +-- +-- > realPath id "./foo/../bar" +-- > realPath existenceCheck "/etc/hostname" +realPath :: (RealPathOptions -> RealPathOptions) -> FilePath -> IO FilePath +realPath modifier path = do + let opts = modifier defaultConfig + when (_requireExistence opts) $ do + exists <- doesPathExist path + when (not exists) $ + ioError (userError ("realPath: path does not exist: " ++ path)) + canonicalizePath path From 9dafda81f49331a3584a61c4604e25a0e4f7be84 Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Mon, 20 Apr 2026 17:24:35 +0530 Subject: [PATCH 03/12] Implement relative-to option in realpath --- src/Streamly/Coreutils/RealPath.hs | 77 ++++++++++++++++++++++-------- 1 file changed, 57 insertions(+), 20 deletions(-) diff --git a/src/Streamly/Coreutils/RealPath.hs b/src/Streamly/Coreutils/RealPath.hs index 55cf7ad2..5abc0e69 100644 --- a/src/Streamly/Coreutils/RealPath.hs +++ b/src/Streamly/Coreutils/RealPath.hs @@ -11,8 +11,10 @@ -- Corresponds to the shell @realpath@ command. -- -- By default the path does not need to exist — nonexistent trailing --- components are preserved in the result. Use 'existenceCheck' to --- require existence (matching GNU @realpath@'s default). +-- components are preserved in the result. Use 'pathMustExist' to +-- require existence (matching GNU @realpath@'s default). Use +-- 'relativeTo' to produce a path relative to a given base directory +-- (corresponds to @realpath --relative-to@). -- -- == Caveats -- @@ -21,17 +23,22 @@ -- * On POSIX, two paths referring to the same object are not -- guaranteed to canonicalize identically (bind mounts, -- case-insensitive filesystems, etc.). +-- * 'relativeTo' falls back to returning the canonicalized absolute +-- path unchanged when no common prefix exists with the base +-- (e.g. different drives on Windows). module Streamly.Coreutils.RealPath ( RealPathOptions , defaultConfig - , existenceCheck + , pathMustExist + , relativeTo , realPath ) where import Control.Monad (when) import System.Directory (canonicalizePath, doesPathExist) +import System.FilePath (makeRelative) -- import System.IO.Error (ioError, userError) -- = Design notes @@ -42,12 +49,15 @@ import System.Directory (canonicalizePath, doesPathExist) -- -- * 'canonicalizePath' diverges from GNU @realpath@ on nonexistent -- paths: it canonicalizes as much as it can rather than failing. --- 'existenceCheck' restores the GNU default via a pre-check. +-- 'pathMustExist' restores the GNU default via a pre-check. -- --- * Options-driven API following the package convention even though --- only one flag is currently honored. Leaves room for --- @--relative-to@, @-s@ (no-symlinks), etc. without breaking the --- signature. +-- * 'relativeTo' canonicalizes the base directory before diffing, +-- otherwise a base containing @..@ or symlinks would yield a +-- misleading relative path. +-- +-- * Options-driven API following the package convention. Leaves room +-- for further GNU flags (@-s@ no-symlinks, @-q@ quiet) without +-- breaking the signature. -- -- * Throws 'IOError' rather than returning 'Maybe'. A canonicalization -- failure is an exceptional condition, not a lookup miss — matches @@ -55,33 +65,60 @@ import System.Directory (canonicalizePath, doesPathExist) -- | Options for 'realPath'. Construct via 'defaultConfig' and compose -- modifiers with @(.)@. -newtype RealPathOptions = RealPathOptions +data RealPathOptions = RealPathOptions { _requireExistence :: Bool + , _relativeBase :: Maybe FilePath } --- | Default configuration: does not require the path to exist. +-- | Default configuration: does not require the path to exist and +-- returns an absolute path (no relative-to base). defaultConfig :: RealPathOptions -defaultConfig = RealPathOptions { _requireExistence = False } +defaultConfig = RealPathOptions + { _requireExistence = False + , _relativeBase = Nothing + } -- | Require that the path exists. Corresponds to GNU @realpath -e@. -- Throws 'IOError' if the path does not exist. -existenceCheck :: RealPathOptions -> RealPathOptions -existenceCheck opts = opts { _requireExistence = True } +pathMustExist :: RealPathOptions -> RealPathOptions +pathMustExist opts = opts { _requireExistence = True } + +-- | Return the canonical path relative to the given base directory. +-- Corresponds to GNU @realpath --relative-to=DIR@. +-- +-- The base is canonicalized before the relative path is computed, so +-- @..@ segments and symlinks in the base are handled correctly. +-- +-- If the canonical path and base share no common prefix (e.g. they +-- live on different Windows drives), the canonical absolute path is +-- returned unchanged. +relativeTo :: FilePath -> RealPathOptions -> RealPathOptions +relativeTo base opts = opts { _relativeBase = Just base } --- | Resolve a path to its canonical absolute form: make it absolute, --- normalize @.@ and @..@, and follow all symbolic links. +-- | Resolve a path to its canonical form: make it absolute, normalize +-- @.@ and @..@, and follow all symbolic links. -- Corresponds to the shell @realpath@ command. -- -- Throws 'IOError' if the path cannot be canonicalized, or — when --- 'existenceCheck' is set — if the path does not exist. +-- 'pathMustExist' is set — if the path does not exist. -- --- > realPath id "./foo/../bar" --- > realPath existenceCheck "/etc/hostname" -realPath :: (RealPathOptions -> RealPathOptions) -> FilePath -> IO FilePath +-- > realPath id "./foo/../bar" +-- > realPath pathMustExist "/etc/hostname" +-- > realPath (relativeTo "/home/alice") "/home/alice/docs/file" +-- > realPath (pathMustExist . relativeTo "/") "/etc/hostname" +realPath + :: (RealPathOptions -> RealPathOptions) + -> FilePath + -> IO FilePath realPath modifier path = do let opts = modifier defaultConfig when (_requireExistence opts) $ do exists <- doesPathExist path when (not exists) $ ioError (userError ("realPath: path does not exist: " ++ path)) - canonicalizePath path + canonical <- canonicalizePath path + case _relativeBase opts of + Nothing -> return canonical + Just base -> do + canonicalBase <- canonicalizePath base + return (makeRelative canonicalBase canonical) From 922be86db01800b7ec6144430251da0e4c5d50a6 Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Mon, 20 Apr 2026 18:11:04 +0530 Subject: [PATCH 04/12] Add noSymLinks option --- src/Streamly/Coreutils/RealPath.hs | 206 ++++++++++++++++++++++++----- 1 file changed, 171 insertions(+), 35 deletions(-) diff --git a/src/Streamly/Coreutils/RealPath.hs b/src/Streamly/Coreutils/RealPath.hs index 5abc0e69..c237355d 100644 --- a/src/Streamly/Coreutils/RealPath.hs +++ b/src/Streamly/Coreutils/RealPath.hs @@ -10,11 +10,17 @@ -- @.@ and @..@ segments, and follow every symbolic link along the way. -- Corresponds to the shell @realpath@ command. -- --- By default the path does not need to exist — nonexistent trailing --- components are preserved in the result. Use 'pathMustExist' to --- require existence (matching GNU @realpath@'s default). Use --- 'relativeTo' to produce a path relative to a given base directory --- (corresponds to @realpath --relative-to@). +-- Call 'realPath' with @id@ for the default behavior, or compose +-- modifiers with @(.)@ to customize: +-- +-- * 'pathMustExist' - require the path to exist (GNU @realpath -e@). +-- * 'noSymlinks' - don't expand symbolic links; normalize @.@ and +-- @..@ lexically only (GNU @realpath -s@ / @--no-symlinks@). +-- * 'relativeTo' - produce a path relative to a given base directory +-- (GNU @realpath --relative-to=DIR@). +-- +-- Each modifier's Haddock describes the default that applies in its +-- absence. -- -- == Caveats -- @@ -26,86 +32,213 @@ -- * 'relativeTo' falls back to returning the canonicalized absolute -- path unchanged when no common prefix exists with the base -- (e.g. different drives on Windows). +-- * 'noSymlinks' is purely lexical - no filesystem access is made on +-- the path itself (only 'System.Directory.getCurrentDirectory' when +-- the path is relative). It does not check whether the path exists +-- unless combined with 'pathMustExist'. Because it's lexical, it +-- can give a different result than the default mode when the path +-- traverses through a symlink via @..@: @\/link\/..@ lexically +-- resolves to @\/@, but physically resolves to the parent of the +-- symlink's target. +-- * GNU @realpath@'s @-L@ / @--logical@ mode (resolve @..@ textually +-- before expanding symlinks) is not currently supported. Use +-- 'noSymlinks' if you want purely lexical normalization, or the +-- default physical mode otherwise. module Streamly.Coreutils.RealPath ( RealPathOptions - , defaultConfig , pathMustExist + , noSymlinks , relativeTo , realPath ) where import Control.Monad (when) -import System.Directory (canonicalizePath, doesPathExist) -import System.FilePath (makeRelative) +import System.Directory + (canonicalizePath, doesPathExist, makeAbsolute) +import System.FilePath + (makeRelative, splitDirectories, joinPath, isAbsolute) -- import System.IO.Error (ioError, userError) +-- $setup +-- >>> import Control.Exception (try, SomeException) +-- >>> import System.Directory (getCurrentDirectory, getTemporaryDirectory) +-- >>> import System.FilePath ((), isAbsolute) + -- = Design notes -- --- * Thin wrapper over 'System.Directory.canonicalizePath' from the --- @directory@ package. Per the package design notes, we don't --- reimplement what @directory@ already does well. +-- * Thin wrapper over 'System.Directory.canonicalizePath' for the +-- default physical mode; 'noSymlinks' uses 'makeAbsolute' plus a +-- custom @..@-collapsing walker. +-- +-- * Why a custom walker for 'noSymlinks'. We initially tried +-- 'System.FilePath.normalise', but its documentation is explicit: +-- "Does not remove \"..\", because of symlinks" - e.g. Posix: +-- @normalise "/a/../c" == "/a/../c"@. That is the correct default +-- for a symlink-aware normalizer but is the wrong semantics for +-- @realpath -s@, which explicitly asks for symlink-oblivious +-- lexical resolution. So we collapse @..@ ourselves with +-- 'lexicalCollapse' below. -- -- * 'canonicalizePath' diverges from GNU @realpath@ on nonexistent -- paths: it canonicalizes as much as it can rather than failing. -- 'pathMustExist' restores the GNU default via a pre-check. -- --- * 'relativeTo' canonicalizes the base directory before diffing, --- otherwise a base containing @..@ or symlinks would yield a --- misleading relative path. +-- * GNU @-L@ / @--logical@ (resolve @..@ textually, then expand +-- remaining symlinks) is intentionally not implemented. @directory@ +-- offers no primitive for it, and implementing it correctly +-- requires a per-component walker that canonicalizes surviving +-- components one at a time. Easy to get wrong on Windows edges, +-- and the demand is rare. Revisit if a real use case shows up. +-- +-- * 'pathMustExist' composes with 'noSymlinks': existence is checked +-- on the path as given, not the expanded form, matching GNU +-- @realpath -e -s@. -- --- * Options-driven API following the package convention. Leaves room --- for further GNU flags (@-s@ no-symlinks, @-q@ quiet) without --- breaking the signature. +-- * 'relativeTo' always canonicalizes the base physically (following +-- symlinks) regardless of 'noSymlinks'. Otherwise +-- @relativeTo "foo/../bar"@ with a lexical base would give +-- surprising results. If a future use case needs a lexical base, +-- add a separate modifier rather than overloading this one. -- -- * Throws 'IOError' rather than returning 'Maybe'. A canonicalization --- failure is an exceptional condition, not a lookup miss — matches +-- failure is an exceptional condition, not a lookup miss - matches -- the error-handling guidance in the package design notes. --- | Options for 'realPath'. Construct via 'defaultConfig' and compose --- modifiers with @(.)@. +-- | Options for 'realPath'. Users don't construct 'RealPathOptions' +-- directly - instead, pass @id@ for the default behavior, or a +-- modifier (or composition of modifiers with @(.)@) to 'realPath'. data RealPathOptions = RealPathOptions { _requireExistence :: Bool + , _expandSymlinks :: Bool , _relativeBase :: Maybe FilePath } --- | Default configuration: does not require the path to exist and --- returns an absolute path (no relative-to base). +-- Default configuration: the seed value that modifiers are composed +-- onto. Users supply @id@ (or a modifier chain) at the call site +-- rather than referring to this directly. defaultConfig :: RealPathOptions defaultConfig = RealPathOptions { _requireExistence = False + , _expandSymlinks = True , _relativeBase = Nothing } -- | Require that the path exists. Corresponds to GNU @realpath -e@. -- Throws 'IOError' if the path does not exist. +-- +-- Default (without this modifier): the path does not need to exist; +-- nonexistent trailing components are preserved in the result. +-- +-- Succeeds on an existing path (result is canonicalized and is +-- idempotent under another 'realPath'): +-- +-- >>> cwd <- getCurrentDirectory +-- >>> r1 <- realPath pathMustExist cwd +-- >>> r2 <- realPath pathMustExist r1 +-- >>> r1 == r2 +-- True +-- +-- Throws on a nonexistent path: +-- +-- >>> result <- try (realPath pathMustExist "/definitely/does/not/exist/xyzzy") :: IO (Either SomeException FilePath) +-- >>> either (const True) (const False) result +-- True pathMustExist :: RealPathOptions -> RealPathOptions pathMustExist opts = opts { _requireExistence = True } +-- | Don't expand symbolic links. The path is made absolute and +-- @.@\/@..@ segments are normalized lexically, but symlinks are left +-- in place. Corresponds to GNU @realpath -s@ / @--no-symlinks@. +-- +-- Default (without this modifier): symbolic links are fully expanded +-- (GNU @realpath@'s physical mode, @-P@). +-- +-- This is a purely lexical operation on the path string - no +-- filesystem access is made on the path components (only +-- 'System.Directory.getCurrentDirectory' when the input is relative). +-- +-- Collapses @..@ and @.@ textually: +-- +-- >>> tmp <- getTemporaryDirectory +-- >>> r <- realPath noSymlinks (tmp "a" ".." "b") +-- >>> r == tmp "b" +-- True +-- +-- Handles @.@ segments: +-- +-- >>> r <- realPath noSymlinks (tmp "." "x") +-- >>> r == tmp "x" +-- True +noSymlinks :: RealPathOptions -> RealPathOptions +noSymlinks opts = opts { _expandSymlinks = False } + -- | Return the canonical path relative to the given base directory. -- Corresponds to GNU @realpath --relative-to=DIR@. -- --- The base is canonicalized before the relative path is computed, so --- @..@ segments and symlinks in the base are handled correctly. +-- Default (without this modifier): an absolute path is returned. +-- +-- The base is canonicalized (physically, following symlinks) before +-- the relative path is computed, so @..@ segments and symlinks in the +-- base are handled correctly. -- -- If the canonical path and base share no common prefix (e.g. they -- live on different Windows drives), the canonical absolute path is -- returned unchanged. +-- +-- A path relative to itself is @\".\"@: +-- +-- >>> cwd <- getCurrentDirectory +-- >>> realPath (relativeTo cwd) cwd +-- "." relativeTo :: FilePath -> RealPathOptions -> RealPathOptions relativeTo base opts = opts { _relativeBase = Just base } --- | Resolve a path to its canonical form: make it absolute, normalize --- @.@ and @..@, and follow all symbolic links. +-- Collapse @.@ and @..@ segments lexically. On absolute paths, @..@ +-- at the root is dropped (you can't ascend above @\/@). On relative +-- paths, leading @..@ segments are preserved. +-- +-- Uses 'splitDirectories' / 'joinPath' from @filepath@ to stay +-- platform-correct on separator handling. +lexicalCollapse :: FilePath -> FilePath +lexicalCollapse p = + let parts = splitDirectories p + absolute = isAbsolute p + (root, rest) = + if absolute + then case parts of + (r:xs) -> (Just r, xs) + [] -> (Nothing, []) + else (Nothing, parts) + step acc "." = acc + step acc ".." = case acc of + -- Relative path: preserve leading .. + [] -> if absolute then [] else [".."] + (x:xs) | x == ".." -> "..":x:xs + | otherwise -> xs + step acc x = x : acc + collapsed = reverse (foldl step [] rest) + in case root of + Just r -> joinPath (r : collapsed) + Nothing -> if null collapsed then "." else joinPath collapsed + +-- | Resolve a path to its canonical form. -- Corresponds to the shell @realpath@ command. -- --- Throws 'IOError' if the path cannot be canonicalized, or — when --- 'pathMustExist' is set — if the path does not exist. +-- Pass @id@ for default behavior, or a modifier (or modifier chain +-- composed with @(.)@) to customize. Each modifier's Haddock +-- documents the default that applies in its absence. +-- +-- Throws 'IOError' if the path cannot be canonicalized, or - when +-- 'pathMustExist' is set - if the path does not exist. +-- +-- The default-mode result on an existing directory is absolute: -- --- > realPath id "./foo/../bar" --- > realPath pathMustExist "/etc/hostname" --- > realPath (relativeTo "/home/alice") "/home/alice/docs/file" --- > realPath (pathMustExist . relativeTo "/") "/etc/hostname" +-- >>> cwd <- getCurrentDirectory +-- >>> r <- realPath id cwd +-- >>> isAbsolute r +-- True realPath :: (RealPathOptions -> RealPathOptions) -> FilePath @@ -116,9 +249,12 @@ realPath modifier path = do exists <- doesPathExist path when (not exists) $ ioError (userError ("realPath: path does not exist: " ++ path)) - canonical <- canonicalizePath path + resolved <- + if _expandSymlinks opts + then canonicalizePath path + else fmap lexicalCollapse (makeAbsolute path) case _relativeBase opts of - Nothing -> return canonical + Nothing -> return resolved Just base -> do canonicalBase <- canonicalizePath base - return (makeRelative canonicalBase canonical) + return (makeRelative canonicalBase resolved) From de506bfa1489f4433ada19bb9ece4876a10de022 Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Mon, 20 Apr 2026 18:26:40 +0530 Subject: [PATCH 05/12] Add a logical option --- src/Streamly/Coreutils/RealPath.hs | 70 ++++++++++++++++++++++++------ 1 file changed, 57 insertions(+), 13 deletions(-) diff --git a/src/Streamly/Coreutils/RealPath.hs b/src/Streamly/Coreutils/RealPath.hs index c237355d..68c7ac84 100644 --- a/src/Streamly/Coreutils/RealPath.hs +++ b/src/Streamly/Coreutils/RealPath.hs @@ -16,6 +16,8 @@ -- * 'pathMustExist' - require the path to exist (GNU @realpath -e@). -- * 'noSymlinks' - don't expand symbolic links; normalize @.@ and -- @..@ lexically only (GNU @realpath -s@ / @--no-symlinks@). +-- * 'logical' - resolve @..@ components lexically before expanding +-- symlinks (GNU @realpath -L@ / @--logical@). -- * 'relativeTo' - produce a path relative to a given base directory -- (GNU @realpath --relative-to=DIR@). -- @@ -40,15 +42,12 @@ -- traverses through a symlink via @..@: @\/link\/..@ lexically -- resolves to @\/@, but physically resolves to the parent of the -- symlink's target. --- * GNU @realpath@'s @-L@ / @--logical@ mode (resolve @..@ textually --- before expanding symlinks) is not currently supported. Use --- 'noSymlinks' if you want purely lexical normalization, or the --- default physical mode otherwise. module Streamly.Coreutils.RealPath ( RealPathOptions , pathMustExist , noSymlinks + , logical , relativeTo , realPath ) @@ -85,12 +84,21 @@ import System.FilePath -- paths: it canonicalizes as much as it can rather than failing. -- 'pathMustExist' restores the GNU default via a pre-check. -- --- * GNU @-L@ / @--logical@ (resolve @..@ textually, then expand --- remaining symlinks) is intentionally not implemented. @directory@ --- offers no primitive for it, and implementing it correctly --- requires a per-component walker that canonicalizes surviving --- components one at a time. Easy to get wrong on Windows edges, --- and the demand is rare. Revisit if a real use case shows up. +-- * 'logical' is implemented as @canonicalizePath . lexicalCollapse@: +-- collapse @..@ segments as text first, then let 'canonicalizePath' +-- expand whatever symlinks remain in the surviving components. This +-- matches GNU @-L@'s spec of "resolve @..@ before symlinks". Note +-- that symlinks /inside/ a symlink's target are still resolved +-- physically - @-L@ only governs @..@ in the input path. An earlier +-- version of this module deferred implementing @-L@ out of caution +-- about a custom per-component walker; once 'lexicalCollapse' was +-- written and tested for 'noSymlinks', 'logical' reduced to the +-- two-step composition above. +-- +-- * When 'logical' and 'noSymlinks' are both set, 'noSymlinks' wins: +-- no symlink expansion happens in either phase. 'logical' in that +-- combination is redundant (its lexical step is also what +-- 'noSymlinks' does). -- -- * 'pathMustExist' composes with 'noSymlinks': existence is checked -- on the path as given, not the expanded form, matching GNU @@ -112,6 +120,7 @@ import System.FilePath data RealPathOptions = RealPathOptions { _requireExistence :: Bool , _expandSymlinks :: Bool + , _logicalDots :: Bool , _relativeBase :: Maybe FilePath } @@ -122,6 +131,7 @@ defaultConfig :: RealPathOptions defaultConfig = RealPathOptions { _requireExistence = False , _expandSymlinks = True + , _logicalDots = False , _relativeBase = Nothing } @@ -174,6 +184,34 @@ pathMustExist opts = opts { _requireExistence = True } noSymlinks :: RealPathOptions -> RealPathOptions noSymlinks opts = opts { _expandSymlinks = False } +-- | Resolve @..@ components lexically before expanding symbolic +-- links. Corresponds to GNU @realpath -L@ / @--logical@. +-- +-- Default (without this modifier): @..@ is resolved physically, i.e. +-- symlinks in the path are expanded first and @..@ then applies to +-- the resolved location. With this modifier, @..@ is applied as text +-- first (so @\/link\/..@ becomes @\/@), and any symlinks remaining in +-- the surviving components are expanded afterwards. +-- +-- The two modes give the same result on paths that don't mix @..@ +-- with symlinks. They diverge on a path like @\/link\/..@ where +-- @\/link@ is a symlink: physical resolution follows the symlink and +-- then ascends from its target; logical resolution cancels the +-- @link@ and @..@ textually, giving @\/@. +-- +-- If combined with 'noSymlinks', 'noSymlinks' wins - no symlinks are +-- expanded in either phase. +-- +-- On a path without symlinks, logical mode is equivalent to default +-- mode: +-- +-- >>> tmp <- getTemporaryDirectory +-- >>> r <- realPath logical (tmp "a" ".." "b") +-- >>> r == tmp "b" +-- True +logical :: RealPathOptions -> RealPathOptions +logical opts = opts { _logicalDots = True } + -- | Return the canonical path relative to the given base directory. -- Corresponds to GNU @realpath --relative-to=DIR@. -- @@ -249,10 +287,16 @@ realPath modifier path = do exists <- doesPathExist path when (not exists) $ ioError (userError ("realPath: path does not exist: " ++ path)) + -- Three modes, in precedence order: + -- 1. noSymlinks: purely lexical (wins over logical if both set). + -- 2. logical: lexically collapse .., then expand symlinks. + -- 3. physical (default): canonicalizePath does everything. resolved <- - if _expandSymlinks opts - then canonicalizePath path - else fmap lexicalCollapse (makeAbsolute path) + if not (_expandSymlinks opts) + then fmap lexicalCollapse (makeAbsolute path) + else if _logicalDots opts + then fmap lexicalCollapse (makeAbsolute path) >>= canonicalizePath + else canonicalizePath path case _relativeBase opts of Nothing -> return resolved Just base -> do From f4270159b1c4d1104d24b4158441b7dad8fa0087 Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Mon, 20 Apr 2026 19:28:12 +0530 Subject: [PATCH 06/12] Collapse logical and noSymlinks in a single option --- src/Streamly/Coreutils/RealPath.hs | 226 ++++++++++++++--------------- 1 file changed, 113 insertions(+), 113 deletions(-) diff --git a/src/Streamly/Coreutils/RealPath.hs b/src/Streamly/Coreutils/RealPath.hs index 68c7ac84..7feaec96 100644 --- a/src/Streamly/Coreutils/RealPath.hs +++ b/src/Streamly/Coreutils/RealPath.hs @@ -14,10 +14,9 @@ -- modifiers with @(.)@ to customize: -- -- * 'pathMustExist' - require the path to exist (GNU @realpath -e@). --- * 'noSymlinks' - don't expand symbolic links; normalize @.@ and --- @..@ lexically only (GNU @realpath -s@ / @--no-symlinks@). --- * 'logical' - resolve @..@ components lexically before expanding --- symlinks (GNU @realpath -L@ / @--logical@). +-- * 'resolveSymlinks' - control when (and whether) symbolic links +-- are expanded: 'TargetParents' (default, GNU @-P@), 'OriginalParents' +-- (GNU @-L@), or 'DontResolve' (GNU @-s@). -- * 'relativeTo' - produce a path relative to a given base directory -- (GNU @realpath --relative-to=DIR@). -- @@ -34,20 +33,20 @@ -- * 'relativeTo' falls back to returning the canonicalized absolute -- path unchanged when no common prefix exists with the base -- (e.g. different drives on Windows). --- * 'noSymlinks' is purely lexical - no filesystem access is made on --- the path itself (only 'System.Directory.getCurrentDirectory' when --- the path is relative). It does not check whether the path exists --- unless combined with 'pathMustExist'. Because it's lexical, it --- can give a different result than the default mode when the path --- traverses through a symlink via @..@: @\/link\/..@ lexically --- resolves to @\/@, but physically resolves to the parent of the --- symlink's target. +-- * 'resolveSymlinks' 'DontResolve' is purely lexical - no filesystem +-- access is made on the path itself (only +-- 'System.Directory.getCurrentDirectory' when the path is relative). +-- It does not check whether the path exists unless combined with +-- 'pathMustExist'. Because it's lexical, it can give a different +-- result than the default mode when the path traverses through a +-- symlink via @..@: @\/link\/..@ lexically resolves to @\/@, but +-- physically resolves to the parent of the symlink's target. module Streamly.Coreutils.RealPath ( RealPathOptions + , SymlinkResolution (..) , pathMustExist - , noSymlinks - , logical + , resolveSymlinks , relativeTo , realPath ) @@ -68,44 +67,45 @@ import System.FilePath -- = Design notes -- -- * Thin wrapper over 'System.Directory.canonicalizePath' for the --- default physical mode; 'noSymlinks' uses 'makeAbsolute' plus a --- custom @..@-collapsing walker. --- --- * Why a custom walker for 'noSymlinks'. We initially tried +-- default 'TargetParents' (physical) mode; 'OriginalParents' and 'DontResolve' +-- use 'makeAbsolute' plus a custom @..@-collapsing walker +-- ('lexicalCollapse'). +-- +-- * Why a single 'SymlinkResolution' enum instead of two flags. +-- Symlink expansion is a three-way choice, not two orthogonal +-- booleans. An earlier iteration exposed 'logical' and 'noSymlinks' +-- as separate modifiers, which required a precedence rule for +-- @logical . noSymlinks@ ('noSymlinks' won). Collapsing to one +-- enum makes the choice exclusive by construction - no precedence +-- rule needed, no way to express contradictory combinations. +-- +-- * Why a custom walker for the lexical modes. We initially tried -- 'System.FilePath.normalise', but its documentation is explicit: -- "Does not remove \"..\", because of symlinks" - e.g. Posix: -- @normalise "/a/../c" == "/a/../c"@. That is the correct default -- for a symlink-aware normalizer but is the wrong semantics for --- @realpath -s@, which explicitly asks for symlink-oblivious --- lexical resolution. So we collapse @..@ ourselves with --- 'lexicalCollapse' below. +-- @realpath -s@ and @-L@, which want symlink-oblivious lexical +-- resolution of @..@. So we collapse @..@ ourselves in +-- 'lexicalCollapse'. -- -- * 'canonicalizePath' diverges from GNU @realpath@ on nonexistent -- paths: it canonicalizes as much as it can rather than failing. -- 'pathMustExist' restores the GNU default via a pre-check. -- --- * 'logical' is implemented as @canonicalizePath . lexicalCollapse@: --- collapse @..@ segments as text first, then let 'canonicalizePath' --- expand whatever symlinks remain in the surviving components. This --- matches GNU @-L@'s spec of "resolve @..@ before symlinks". Note --- that symlinks /inside/ a symlink's target are still resolved --- physically - @-L@ only governs @..@ in the input path. An earlier --- version of this module deferred implementing @-L@ out of caution --- about a custom per-component walker; once 'lexicalCollapse' was --- written and tested for 'noSymlinks', 'logical' reduced to the --- two-step composition above. --- --- * When 'logical' and 'noSymlinks' are both set, 'noSymlinks' wins: --- no symlink expansion happens in either phase. 'logical' in that --- combination is redundant (its lexical step is also what --- 'noSymlinks' does). --- --- * 'pathMustExist' composes with 'noSymlinks': existence is checked --- on the path as given, not the expanded form, matching GNU --- @realpath -e -s@. +-- * 'OriginalParents' is implemented as +-- @canonicalizePath . lexicalCollapse . makeAbsolute@: collapse +-- @..@ as text first, then let 'canonicalizePath' expand whatever +-- symlinks remain in the surviving components. This matches GNU +-- @-L@'s spec of "resolve @..@ before symlinks". Note that +-- symlinks /inside/ a symlink's target are still resolved +-- physically - @-L@ only governs @..@ in the input path. +-- +-- * 'pathMustExist' composes with any 'SymlinkResolution': existence +-- is checked on the path as given, not on the expanded form, +-- matching GNU @realpath -e -s@ and @realpath -e -L@. -- -- * 'relativeTo' always canonicalizes the base physically (following --- symlinks) regardless of 'noSymlinks'. Otherwise +-- symlinks) regardless of the 'SymlinkResolution' mode. Otherwise -- @relativeTo "foo/../bar"@ with a lexical base would give -- surprising results. If a future use case needs a lexical base, -- add a separate modifier rather than overloading this one. @@ -114,14 +114,40 @@ import System.FilePath -- failure is an exceptional condition, not a lookup miss - matches -- the error-handling guidance in the package design notes. +-- | How @..@ and symbolic links interact when resolving a path. +-- The three modes differ on where a @..@ segment points when it +-- follows a symlink, and on whether symlinks are expanded at all. +-- +-- * 'TargetParents': @..@ means the parent of the symlink's +-- /target/. Symlinks are expanded first, so @..@ ascends from the +-- resolved location. Matches GNU @realpath@'s default physical +-- mode (@-P@). +-- * 'OriginalParents': @..@ means the parent in the /original/ path +-- you supplied - @..@ textually cancels the preceding segment, +-- regardless of whether that segment was a symlink. Remaining +-- symlinks in the surviving path are still expanded. Matches GNU +-- @realpath -L@ / @--logical@. +-- * 'DontResolve': no symlinks are expanded anywhere in the path. +-- @..@ is lexical (same as 'OriginalParents'), and symlinks in +-- other components are preserved as-is. Matches GNU @realpath -s@ +-- / @--no-symlinks@. +-- +-- The three modes produce the same result on paths that contain no +-- symlinks. 'TargetParents' and 'OriginalParents' diverge when a +-- symlink is followed by @..@; 'DontResolve' diverges from both +-- whenever the path contains any symlink. +data SymlinkResolution + = TargetParents + | OriginalParents + | DontResolve + -- | Options for 'realPath'. Users don't construct 'RealPathOptions' -- directly - instead, pass @id@ for the default behavior, or a -- modifier (or composition of modifiers with @(.)@) to 'realPath'. data RealPathOptions = RealPathOptions - { _requireExistence :: Bool - , _expandSymlinks :: Bool - , _logicalDots :: Bool - , _relativeBase :: Maybe FilePath + { _requireExistence :: Bool + , _symlinkResolution :: SymlinkResolution + , _relativeBase :: Maybe FilePath } -- Default configuration: the seed value that modifiers are composed @@ -129,10 +155,9 @@ data RealPathOptions = RealPathOptions -- rather than referring to this directly. defaultConfig :: RealPathOptions defaultConfig = RealPathOptions - { _requireExistence = False - , _expandSymlinks = True - , _logicalDots = False - , _relativeBase = Nothing + { _requireExistence = False + , _symlinkResolution = TargetParents + , _relativeBase = Nothing } -- | Require that the path exists. Corresponds to GNU @realpath -e@. @@ -158,59 +183,38 @@ defaultConfig = RealPathOptions pathMustExist :: RealPathOptions -> RealPathOptions pathMustExist opts = opts { _requireExistence = True } --- | Don't expand symbolic links. The path is made absolute and --- @.@\/@..@ segments are normalized lexically, but symlinks are left --- in place. Corresponds to GNU @realpath -s@ / @--no-symlinks@. +-- | Choose how @..@ and symbolic links interact. See +-- 'SymlinkResolution' for the three modes and a full explanation. -- --- Default (without this modifier): symbolic links are fully expanded --- (GNU @realpath@'s physical mode, @-P@). +-- Default (without this modifier): 'TargetParents' - @..@ ascends +-- from the symlink's target (GNU @realpath@'s physical mode, @-P@). -- --- This is a purely lexical operation on the path string - no --- filesystem access is made on the path components (only --- 'System.Directory.getCurrentDirectory' when the input is relative). +-- 'DontResolve' does not check whether the path exists unless +-- combined with 'pathMustExist'. -- --- Collapses @..@ and @.@ textually: +-- On a path that contains no symlinks, all three modes produce the +-- same result (both examples below go through 'canonicalizePath', +-- which expands any symlinks in the base): -- -- >>> tmp <- getTemporaryDirectory --- >>> r <- realPath noSymlinks (tmp "a" ".." "b") --- >>> r == tmp "b" --- True --- --- Handles @.@ segments: --- --- >>> r <- realPath noSymlinks (tmp "." "x") --- >>> r == tmp "x" +-- >>> r1 <- realPath (resolveSymlinks OriginalParents) (tmp "a" ".." "b") +-- >>> r2 <- realPath id (tmp "b") +-- >>> r1 == r2 -- True -noSymlinks :: RealPathOptions -> RealPathOptions -noSymlinks opts = opts { _expandSymlinks = False } - --- | Resolve @..@ components lexically before expanding symbolic --- links. Corresponds to GNU @realpath -L@ / @--logical@. --- --- Default (without this modifier): @..@ is resolved physically, i.e. --- symlinks in the path are expanded first and @..@ then applies to --- the resolved location. With this modifier, @..@ is applied as text --- first (so @\/link\/..@ becomes @\/@), and any symlinks remaining in --- the surviving components are expanded afterwards. --- --- The two modes give the same result on paths that don't mix @..@ --- with symlinks. They diverge on a path like @\/link\/..@ where --- @\/link@ is a symlink: physical resolution follows the symlink and --- then ascends from its target; logical resolution cancels the --- @link@ and @..@ textually, giving @\/@. -- --- If combined with 'noSymlinks', 'noSymlinks' wins - no symlinks are --- expanded in either phase. +-- 'DontResolve' collapses @..@ and @.@ textually and performs no +-- filesystem resolution (so the base is not canonicalized - the +-- result may differ from 'TargetParents' when the base contains +-- symlinks): -- --- On a path without symlinks, logical mode is equivalent to default --- mode: --- --- >>> tmp <- getTemporaryDirectory --- >>> r <- realPath logical (tmp "a" ".." "b") +-- >>> r <- realPath (resolveSymlinks DontResolve) (tmp "a" ".." "b") -- >>> r == tmp "b" -- True -logical :: RealPathOptions -> RealPathOptions -logical opts = opts { _logicalDots = True } +-- >>> r <- realPath (resolveSymlinks DontResolve) (tmp "." "x") +-- >>> r == tmp "x" +-- True +resolveSymlinks :: SymlinkResolution -> RealPathOptions -> RealPathOptions +resolveSymlinks mode opts = opts { _symlinkResolution = mode } -- | Return the canonical path relative to the given base directory. -- Corresponds to GNU @realpath --relative-to=DIR@. @@ -246,20 +250,21 @@ lexicalCollapse p = (root, rest) = if absolute then case parts of - (r:xs) -> (Just r, xs) - [] -> (Nothing, []) + (r:xs) -> (Just r, xs) + [] -> (Nothing, []) else (Nothing, parts) - step acc "." = acc + step acc "." = acc step acc ".." = case acc of - -- Relative path: preserve leading .. - [] -> if absolute then [] else [".."] - (x:xs) | x == ".." -> "..":x:xs - | otherwise -> xs - step acc x = x : acc + -- Relative path: preserve leading .. + [] -> if absolute then [] else [".."] + (x:xs) + | x == ".." -> "..":x:xs + | otherwise -> xs + step acc x = x : acc collapsed = reverse (foldl step [] rest) in case root of - Just r -> joinPath (r : collapsed) - Nothing -> if null collapsed then "." else joinPath collapsed + Just r -> joinPath (r : collapsed) + Nothing -> if null collapsed then "." else joinPath collapsed -- | Resolve a path to its canonical form. -- Corresponds to the shell @realpath@ command. @@ -287,18 +292,13 @@ realPath modifier path = do exists <- doesPathExist path when (not exists) $ ioError (userError ("realPath: path does not exist: " ++ path)) - -- Three modes, in precedence order: - -- 1. noSymlinks: purely lexical (wins over logical if both set). - -- 2. logical: lexically collapse .., then expand symlinks. - -- 3. physical (default): canonicalizePath does everything. - resolved <- - if not (_expandSymlinks opts) - then fmap lexicalCollapse (makeAbsolute path) - else if _logicalDots opts - then fmap lexicalCollapse (makeAbsolute path) >>= canonicalizePath - else canonicalizePath path + resolved <- case _symlinkResolution opts of + TargetParents -> canonicalizePath path + OriginalParents -> + fmap lexicalCollapse (makeAbsolute path) >>= canonicalizePath + DontResolve -> fmap lexicalCollapse (makeAbsolute path) case _relativeBase opts of - Nothing -> return resolved + Nothing -> return resolved Just base -> do canonicalBase <- canonicalizePath base return (makeRelative canonicalBase resolved) From 85f50eb5d6ee73ed4179a7c8bb360395128f8370 Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Mon, 20 Apr 2026 20:14:27 +0530 Subject: [PATCH 07/12] Fix and unify existence checks, align with GNU realpath --- src/Streamly/Coreutils/RealPath.hs | 187 +++++++++++++++++++++-------- 1 file changed, 134 insertions(+), 53 deletions(-) diff --git a/src/Streamly/Coreutils/RealPath.hs b/src/Streamly/Coreutils/RealPath.hs index 7feaec96..d1ebbda8 100644 --- a/src/Streamly/Coreutils/RealPath.hs +++ b/src/Streamly/Coreutils/RealPath.hs @@ -13,7 +13,9 @@ -- Call 'realPath' with @id@ for the default behavior, or compose -- modifiers with @(.)@ to customize: -- --- * 'pathMustExist' - require the path to exist (GNU @realpath -e@). +-- * 'requireExistence' - control which path components must exist on +-- disk: 'AllParents' (default, GNU @-E@), 'EntirePath' (GNU @-e@), +-- or 'DontRequire' (GNU @-m@). -- * 'resolveSymlinks' - control when (and whether) symbolic links -- are expanded: 'TargetParents' (default, GNU @-P@), 'OriginalParents' -- (GNU @-L@), or 'DontResolve' (GNU @-s@). @@ -33,19 +35,21 @@ -- * 'relativeTo' falls back to returning the canonicalized absolute -- path unchanged when no common prefix exists with the base -- (e.g. different drives on Windows). --- * 'resolveSymlinks' 'DontResolve' is purely lexical - no filesystem --- access is made on the path itself (only --- 'System.Directory.getCurrentDirectory' when the path is relative). --- It does not check whether the path exists unless combined with --- 'pathMustExist'. Because it's lexical, it can give a different --- result than the default mode when the path traverses through a --- symlink via @..@: @\/link\/..@ lexically resolves to @\/@, but --- physically resolves to the parent of the symlink's target. +-- * 'resolveSymlinks' 'DontResolve' combined with 'requireExistence' +-- 'DontRequire' is the only configuration that performs no +-- filesystem access on the path components (only +-- 'System.Directory.getCurrentDirectory' when the path is +-- relative). All other configurations involve some filesystem IO. +-- * Because 'DontResolve' is lexical, it can give a different result +-- than the default mode when the path traverses through a symlink +-- via @..@: @\/link\/..@ lexically resolves to @\/@, but physically +-- resolves to the parent of the symlink's target. module Streamly.Coreutils.RealPath ( RealPathOptions + , ExistenceCheck (..) , SymlinkResolution (..) - , pathMustExist + , requireExistence , resolveSymlinks , relativeTo , realPath @@ -54,22 +58,22 @@ where import Control.Monad (when) import System.Directory - (canonicalizePath, doesPathExist, makeAbsolute) + (canonicalizePath, doesDirectoryExist, doesPathExist, makeAbsolute) import System.FilePath - (makeRelative, splitDirectories, joinPath, isAbsolute) + (makeRelative, splitDirectories, joinPath, isAbsolute, takeDirectory) -- import System.IO.Error (ioError, userError) -- $setup -- >>> import Control.Exception (try, SomeException) --- >>> import System.Directory (getCurrentDirectory, getTemporaryDirectory) +-- >>> import System.Directory (canonicalizePath, getCurrentDirectory, getTemporaryDirectory) -- >>> import System.FilePath ((), isAbsolute) -- = Design notes -- -- * Thin wrapper over 'System.Directory.canonicalizePath' for the --- default 'TargetParents' (physical) mode; 'OriginalParents' and 'DontResolve' --- use 'makeAbsolute' plus a custom @..@-collapsing walker --- ('lexicalCollapse'). +-- default 'TargetParents' (physical) mode; 'OriginalParents' and +-- 'DontResolve' use 'makeAbsolute' plus a custom @..@-collapsing +-- walker ('lexicalCollapse'). -- -- * Why a single 'SymlinkResolution' enum instead of two flags. -- Symlink expansion is a three-way choice, not two orthogonal @@ -77,7 +81,9 @@ import System.FilePath -- as separate modifiers, which required a precedence rule for -- @logical . noSymlinks@ ('noSymlinks' won). Collapsing to one -- enum makes the choice exclusive by construction - no precedence --- rule needed, no way to express contradictory combinations. +-- rule needed, no way to express contradictory combinations. The +-- same reasoning applies to 'ExistenceCheck': three mutually +-- exclusive modes are one enum, not two flags. -- -- * Why a custom walker for the lexical modes. We initially tried -- 'System.FilePath.normalise', but its documentation is explicit: @@ -90,7 +96,26 @@ import System.FilePath -- -- * 'canonicalizePath' diverges from GNU @realpath@ on nonexistent -- paths: it canonicalizes as much as it can rather than failing. --- 'pathMustExist' restores the GNU default via a pre-check. +-- The 'ExistenceCheck' pre-check restores GNU-compatible behavior +-- by rejecting missing paths before we call 'canonicalizePath'. +-- +-- * Default 'ExistenceCheck' is 'AllParents', matching GNU @-E@. +-- This is a genuine behavior change from a pre-release iteration +-- that defaulted to \"accept anything\" - we chose GNU +-- compatibility as the cost of being slightly less permissive by +-- default. +-- +-- * 'AllParents' is implemented via @doesDirectoryExist@ on +-- 'takeDirectory' of the path. If the immediate parent directory +-- exists then every intermediate ancestor must too (by +-- transitivity of directory existence), so a single check covers +-- the GNU @-E@ requirement. Edge cases handled by 'takeDirectory': +-- bare filenames give @"."@ (always exists), @\/@ gives @\/@ +-- (always exists), so these pass without special-casing. +-- +-- * 'EntirePath' uses 'doesPathExist' rather than +-- 'doesDirectoryExist' so that files (not just directories) at the +-- leaf are accepted. -- -- * 'OriginalParents' is implemented as -- @canonicalizePath . lexicalCollapse . makeAbsolute@: collapse @@ -100,9 +125,9 @@ import System.FilePath -- symlinks /inside/ a symlink's target are still resolved -- physically - @-L@ only governs @..@ in the input path. -- --- * 'pathMustExist' composes with any 'SymlinkResolution': existence --- is checked on the path as given, not on the expanded form, --- matching GNU @realpath -e -s@ and @realpath -e -L@. +-- * 'ExistenceCheck' is checked on the path as given, before any +-- symlink resolution or @..@ collapsing. This matches GNU +-- @realpath -e -s@ and @realpath -e -L@. -- -- * 'relativeTo' always canonicalizes the base physically (following -- symlinks) regardless of the 'SymlinkResolution' mode. Otherwise @@ -114,6 +139,25 @@ import System.FilePath -- failure is an exceptional condition, not a lookup miss - matches -- the error-handling guidance in the package design notes. +-- | Which components of a path must exist on disk for 'realPath' to +-- succeed. +-- +-- * 'EntirePath': every component - including the leaf - must exist. +-- Matches GNU @realpath -e@ / @--canonicalize-existing@. +-- * 'AllParents': every ancestor directory must exist, but the leaf +-- component may be missing. Matches GNU @realpath -E@ / +-- @--canonicalize@, the default. This is useful for paths that +-- name something you're about to create, like the destination of +-- a copy. +-- * 'DontRequire': no component needs to exist. The result is +-- canonicalized as far as the existing prefix allows and the rest +-- is appended as-is. Matches GNU @realpath -m@ / +-- @--canonicalize-missing@. +data ExistenceCheck + = EntirePath + | AllParents + | DontRequire + -- | How @..@ and symbolic links interact when resolving a path. -- The three modes differ on where a @..@ segment points when it -- follows a symlink, and on whether symlinks are expanded at all. @@ -145,7 +189,7 @@ data SymlinkResolution -- directly - instead, pass @id@ for the default behavior, or a -- modifier (or composition of modifiers with @(.)@) to 'realPath'. data RealPathOptions = RealPathOptions - { _requireExistence :: Bool + { _existenceCheck :: ExistenceCheck , _symlinkResolution :: SymlinkResolution , _relativeBase :: Maybe FilePath } @@ -155,33 +199,54 @@ data RealPathOptions = RealPathOptions -- rather than referring to this directly. defaultConfig :: RealPathOptions defaultConfig = RealPathOptions - { _requireExistence = False + { _existenceCheck = AllParents , _symlinkResolution = TargetParents , _relativeBase = Nothing } --- | Require that the path exists. Corresponds to GNU @realpath -e@. --- Throws 'IOError' if the path does not exist. +-- | Set which components of a path must exist. See 'ExistenceCheck' +-- for the three modes and a full explanation. -- --- Default (without this modifier): the path does not need to exist; --- nonexistent trailing components are preserved in the result. +-- Default (without this modifier): 'AllParents' - every ancestor +-- directory must exist, but the leaf may be missing (GNU +-- @realpath -E@). -- --- Succeeds on an existing path (result is canonicalized and is --- idempotent under another 'realPath'): +-- 'EntirePath' rejects a path whose leaf does not exist: -- -- >>> cwd <- getCurrentDirectory --- >>> r1 <- realPath pathMustExist cwd --- >>> r2 <- realPath pathMustExist r1 +-- >>> r1 <- realPath (requireExistence EntirePath) cwd +-- >>> r2 <- realPath (requireExistence EntirePath) r1 +-- >>> r1 == r2 +-- True +-- +-- >>> result <- try (realPath (requireExistence EntirePath) "/definitely/does/not/exist/xyzzy") :: IO (Either SomeException FilePath) +-- >>> either (const True) (const False) result +-- True +-- +-- 'AllParents' (the default) accepts a missing leaf as long as the +-- parent directory exists. Comparing against 'canonicalizePath' of +-- the same input (which has the same symlink-expansion behavior on +-- the existing prefix): +-- +-- >>> tmp <- getTemporaryDirectory +-- >>> r1 <- realPath id (tmp "missing-leaf") +-- >>> r2 <- canonicalizePath (tmp "missing-leaf") -- >>> r1 == r2 -- True -- --- Throws on a nonexistent path: +-- 'AllParents' rejects a path whose parent does not exist: -- --- >>> result <- try (realPath pathMustExist "/definitely/does/not/exist/xyzzy") :: IO (Either SomeException FilePath) +-- >>> result <- try (realPath id "/definitely/does/not/exist/child") :: IO (Either SomeException FilePath) -- >>> either (const True) (const False) result -- True -pathMustExist :: RealPathOptions -> RealPathOptions -pathMustExist opts = opts { _requireExistence = True } +-- +-- 'DontRequire' accepts any path, existent or not: +-- +-- >>> r <- realPath (requireExistence DontRequire) "/definitely/does/not/exist/child" +-- >>> null r +-- False +requireExistence :: ExistenceCheck -> RealPathOptions -> RealPathOptions +requireExistence check opts = opts { _existenceCheck = check } -- | Choose how @..@ and symbolic links interact. See -- 'SymlinkResolution' for the three modes and a full explanation. @@ -189,28 +254,28 @@ pathMustExist opts = opts { _requireExistence = True } -- Default (without this modifier): 'TargetParents' - @..@ ascends -- from the symlink's target (GNU @realpath@'s physical mode, @-P@). -- --- 'DontResolve' does not check whether the path exists unless --- combined with 'pathMustExist'. --- --- On a path that contains no symlinks, all three modes produce the --- same result (both examples below go through 'canonicalizePath', --- which expands any symlinks in the base): +-- The examples below compose with @'requireExistence' 'DontRequire'@ +-- so that the @..@ component in the test path doesn't trigger a +-- parent-existence failure. On a path that contains no symlinks, all +-- three modes produce the same result (both examples below go +-- through 'canonicalizePath', which expands any symlinks in the +-- base): -- -- >>> tmp <- getTemporaryDirectory --- >>> r1 <- realPath (resolveSymlinks OriginalParents) (tmp "a" ".." "b") --- >>> r2 <- realPath id (tmp "b") +-- >>> let opts m = resolveSymlinks m . requireExistence DontRequire +-- >>> r1 <- realPath (opts OriginalParents) (tmp "a" ".." "b") +-- >>> r2 <- realPath (requireExistence DontRequire) (tmp "b") -- >>> r1 == r2 -- True -- -- 'DontResolve' collapses @..@ and @.@ textually and performs no --- filesystem resolution (so the base is not canonicalized - the --- result may differ from 'TargetParents' when the base contains --- symlinks): +-- symlink resolution (so the base is not canonicalized - the result +-- may differ from 'TargetParents' when the base contains symlinks): -- --- >>> r <- realPath (resolveSymlinks DontResolve) (tmp "a" ".." "b") +-- >>> r <- realPath (opts DontResolve) (tmp "a" ".." "b") -- >>> r == tmp "b" -- True --- >>> r <- realPath (resolveSymlinks DontResolve) (tmp "." "x") +-- >>> r <- realPath (opts DontResolve) (tmp "." "x") -- >>> r == tmp "x" -- True resolveSymlinks :: SymlinkResolution -> RealPathOptions -> RealPathOptions @@ -266,6 +331,25 @@ lexicalCollapse p = Just r -> joinPath (r : collapsed) Nothing -> if null collapsed then "." else joinPath collapsed +-- Perform the pre-resolution existence check demanded by the given +-- 'ExistenceCheck'. Throws 'IOError' on violation. +checkExistence :: ExistenceCheck -> FilePath -> IO () +checkExistence check path = case check of + DontRequire -> return () + EntirePath -> do + exists <- doesPathExist path + when (not exists) $ + ioError + (userError ("realPath: path does not exist: " ++ path)) + AllParents -> do + let parent = takeDirectory path + parentExists <- doesDirectoryExist parent + when (not parentExists) $ + ioError + (userError + ("realPath: parent directory does not exist: " + ++ parent)) + -- | Resolve a path to its canonical form. -- Corresponds to the shell @realpath@ command. -- @@ -274,7 +358,7 @@ lexicalCollapse p = -- documents the default that applies in its absence. -- -- Throws 'IOError' if the path cannot be canonicalized, or - when --- 'pathMustExist' is set - if the path does not exist. +-- 'requireExistence' demands - if required components do not exist. -- -- The default-mode result on an existing directory is absolute: -- @@ -288,10 +372,7 @@ realPath -> IO FilePath realPath modifier path = do let opts = modifier defaultConfig - when (_requireExistence opts) $ do - exists <- doesPathExist path - when (not exists) $ - ioError (userError ("realPath: path does not exist: " ++ path)) + checkExistence (_existenceCheck opts) path resolved <- case _symlinkResolution opts of TargetParents -> canonicalizePath path OriginalParents -> From ffb6f3467d8540431ef4651c3f2946c341bc2905 Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Mon, 20 Apr 2026 20:54:31 +0530 Subject: [PATCH 08/12] Add the relativeIfWithin option --- src/Streamly/Coreutils/RealPath.hs | 82 +++++++++++++++++++++++++++--- 1 file changed, 75 insertions(+), 7 deletions(-) diff --git a/src/Streamly/Coreutils/RealPath.hs b/src/Streamly/Coreutils/RealPath.hs index d1ebbda8..a9ed789f 100644 --- a/src/Streamly/Coreutils/RealPath.hs +++ b/src/Streamly/Coreutils/RealPath.hs @@ -21,6 +21,9 @@ -- (GNU @-L@), or 'DontResolve' (GNU @-s@). -- * 'relativeTo' - produce a path relative to a given base directory -- (GNU @realpath --relative-to=DIR@). +-- * 'relativeIfWithin' - produce a relative path only if it's under +-- a given directory, otherwise absolute (GNU +-- @realpath --relative-base=DIR@). -- -- Each modifier's Haddock describes the default that applies in its -- absence. @@ -52,11 +55,13 @@ module Streamly.Coreutils.RealPath , requireExistence , resolveSymlinks , relativeTo + , relativeIfWithin , realPath ) where import Control.Monad (when) +import Data.List (isPrefixOf) import System.Directory (canonicalizePath, doesDirectoryExist, doesPathExist, makeAbsolute) import System.FilePath @@ -135,6 +140,14 @@ import System.FilePath -- surprising results. If a future use case needs a lexical base, -- add a separate modifier rather than overloading this one. -- +-- * 'relativeIfWithin' composes with 'relativeTo' as two independent +-- concerns: 'relativeTo' chooses the target, 'relativeIfWithin' +-- gates whether relativization fires. When 'relativeIfWithin' is +-- set alone, its directory serves both roles (matching GNU +-- @--relative-base=DIR@ without @--relative-to@). Containment is +-- tested component-wise via 'splitDirectories' so that partial +-- name prefixes (@\/foo@ vs @\/foobar@) don't register as matches. +-- -- * Throws 'IOError' rather than returning 'Maybe'. A canonicalization -- failure is an exceptional condition, not a lookup miss - matches -- the error-handling guidance in the package design notes. @@ -191,7 +204,8 @@ data SymlinkResolution data RealPathOptions = RealPathOptions { _existenceCheck :: ExistenceCheck , _symlinkResolution :: SymlinkResolution - , _relativeBase :: Maybe FilePath + , _relativeTo :: Maybe FilePath + , _relativeIfWithin :: Maybe FilePath } -- Default configuration: the seed value that modifiers are composed @@ -201,7 +215,8 @@ defaultConfig :: RealPathOptions defaultConfig = RealPathOptions { _existenceCheck = AllParents , _symlinkResolution = TargetParents - , _relativeBase = Nothing + , _relativeTo = Nothing + , _relativeIfWithin = Nothing } -- | Set which components of a path must exist. See 'ExistenceCheck' @@ -300,7 +315,36 @@ resolveSymlinks mode opts = opts { _symlinkResolution = mode } -- >>> realPath (relativeTo cwd) cwd -- "." relativeTo :: FilePath -> RealPathOptions -> RealPathOptions -relativeTo base opts = opts { _relativeBase = Just base } +relativeTo base opts = opts { _relativeTo = Just base } + +-- | Return a relative path only when the resolved path lies within +-- the given directory; otherwise return an absolute path. +-- Corresponds to GNU @realpath --relative-base=DIR@. +-- +-- Default (without this modifier): no containment check is applied - +-- if 'relativeTo' is set, the result is always relative (possibly +-- with @..@ segments); if not, the result is absolute. +-- +-- When composed with 'relativeTo', the 'relativeTo' directory is +-- used as the relativization target and this modifier's directory +-- is used as the containment boundary. When 'relativeTo' is not set, +-- this modifier's directory serves both roles. +-- +-- Inside the boundary, the path is relativized: +-- +-- >>> tmp <- getTemporaryDirectory +-- >>> let child = tmp "missing-leaf" +-- >>> realPath (relativeIfWithin tmp) child +-- "missing-leaf" +-- +-- Outside the boundary, the absolute path is returned unchanged: +-- +-- >>> r1 <- realPath (relativeIfWithin tmp) "/" +-- >>> r2 <- canonicalizePath "/" +-- >>> r1 == r2 +-- True +relativeIfWithin :: FilePath -> RealPathOptions -> RealPathOptions +relativeIfWithin dir opts = opts { _relativeIfWithin = Just dir } -- Collapse @.@ and @..@ segments lexically. On absolute paths, @..@ -- at the root is dropped (you can't ascend above @\/@). On relative @@ -350,6 +394,14 @@ checkExistence check path = case check of ("realPath: parent directory does not exist: " ++ parent)) +-- Is the second path a descendant of (or equal to) the first? +-- Both arguments should already be canonicalized and absolute. +-- Uses component-wise comparison via 'splitDirectories' so that +-- partial-name matches (e.g. @\/foo@ vs @\/foobar@) don't register +-- as containment. +isPathUnder :: FilePath -> FilePath -> Bool +isPathUnder dir p = splitDirectories dir `isPrefixOf` splitDirectories p + -- | Resolve a path to its canonical form. -- Corresponds to the shell @realpath@ command. -- @@ -378,8 +430,24 @@ realPath modifier path = do OriginalParents -> fmap lexicalCollapse (makeAbsolute path) >>= canonicalizePath DontResolve -> fmap lexicalCollapse (makeAbsolute path) - case _relativeBase opts of + -- Relativization and containment logic: + -- * _relativeTo chooses the target to relativize against. + -- * _relativeIfWithin gates whether relativization fires: if + -- the resolved path is not under the boundary, we return the + -- absolute result instead. + -- * When _relativeIfWithin is set alone (no _relativeTo), its + -- directory serves both roles. + let target = case _relativeTo opts of + Just t -> Just t + Nothing -> _relativeIfWithin opts + case target of Nothing -> return resolved - Just base -> do - canonicalBase <- canonicalizePath base - return (makeRelative canonicalBase resolved) + Just t -> do + canonicalTarget <- canonicalizePath t + case _relativeIfWithin opts of + Nothing -> return (makeRelative canonicalTarget resolved) + Just boundary -> do + canonicalBoundary <- canonicalizePath boundary + if isPathUnder canonicalBoundary resolved + then return (makeRelative canonicalTarget resolved) + else return resolved From a6ae3c778dc262ecce768408ccc45eab451072da Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Mon, 20 Apr 2026 22:11:25 +0530 Subject: [PATCH 09/12] Rename TargetParents to avoid confusion --- src/Streamly/Coreutils/RealPath.hs | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/src/Streamly/Coreutils/RealPath.hs b/src/Streamly/Coreutils/RealPath.hs index a9ed789f..c4f52ece 100644 --- a/src/Streamly/Coreutils/RealPath.hs +++ b/src/Streamly/Coreutils/RealPath.hs @@ -17,7 +17,7 @@ -- disk: 'AllParents' (default, GNU @-E@), 'EntirePath' (GNU @-e@), -- or 'DontRequire' (GNU @-m@). -- * 'resolveSymlinks' - control when (and whether) symbolic links --- are expanded: 'TargetParents' (default, GNU @-P@), 'OriginalParents' +-- are expanded: 'UseTargetParents' (default, GNU @-P@), 'UseOriginalParents' -- (GNU @-L@), or 'DontResolve' (GNU @-s@). -- * 'relativeTo' - produce a path relative to a given base directory -- (GNU @realpath --relative-to=DIR@). @@ -76,7 +76,7 @@ import System.FilePath -- = Design notes -- -- * Thin wrapper over 'System.Directory.canonicalizePath' for the --- default 'TargetParents' (physical) mode; 'OriginalParents' and +-- default 'UseTargetParents' (physical) mode; 'UseOriginalParents' and -- 'DontResolve' use 'makeAbsolute' plus a custom @..@-collapsing -- walker ('lexicalCollapse'). -- @@ -122,7 +122,7 @@ import System.FilePath -- 'doesDirectoryExist' so that files (not just directories) at the -- leaf are accepted. -- --- * 'OriginalParents' is implemented as +-- * 'UseOriginalParents' is implemented as -- @canonicalizePath . lexicalCollapse . makeAbsolute@: collapse -- @..@ as text first, then let 'canonicalizePath' expand whatever -- symlinks remain in the surviving components. This matches GNU @@ -175,27 +175,27 @@ data ExistenceCheck -- The three modes differ on where a @..@ segment points when it -- follows a symlink, and on whether symlinks are expanded at all. -- --- * 'TargetParents': @..@ means the parent of the symlink's +-- * 'UseTargetParents': @..@ means the parent of the symlink's -- /target/. Symlinks are expanded first, so @..@ ascends from the -- resolved location. Matches GNU @realpath@'s default physical -- mode (@-P@). --- * 'OriginalParents': @..@ means the parent in the /original/ path +-- * 'UseOriginalParents': @..@ means the parent in the /original/ path -- you supplied - @..@ textually cancels the preceding segment, -- regardless of whether that segment was a symlink. Remaining -- symlinks in the surviving path are still expanded. Matches GNU -- @realpath -L@ / @--logical@. -- * 'DontResolve': no symlinks are expanded anywhere in the path. --- @..@ is lexical (same as 'OriginalParents'), and symlinks in +-- @..@ is lexical (same as 'UseOriginalParents'), and symlinks in -- other components are preserved as-is. Matches GNU @realpath -s@ -- / @--no-symlinks@. -- -- The three modes produce the same result on paths that contain no --- symlinks. 'TargetParents' and 'OriginalParents' diverge when a +-- symlinks. 'UseTargetParents' and 'UseOriginalParents' diverge when a -- symlink is followed by @..@; 'DontResolve' diverges from both -- whenever the path contains any symlink. data SymlinkResolution - = TargetParents - | OriginalParents + = UseTargetParents + | UseOriginalParents | DontResolve -- | Options for 'realPath'. Users don't construct 'RealPathOptions' @@ -214,7 +214,7 @@ data RealPathOptions = RealPathOptions defaultConfig :: RealPathOptions defaultConfig = RealPathOptions { _existenceCheck = AllParents - , _symlinkResolution = TargetParents + , _symlinkResolution = UseTargetParents , _relativeTo = Nothing , _relativeIfWithin = Nothing } @@ -266,7 +266,7 @@ requireExistence check opts = opts { _existenceCheck = check } -- | Choose how @..@ and symbolic links interact. See -- 'SymlinkResolution' for the three modes and a full explanation. -- --- Default (without this modifier): 'TargetParents' - @..@ ascends +-- Default (without this modifier): 'UseTargetParents' - @..@ ascends -- from the symlink's target (GNU @realpath@'s physical mode, @-P@). -- -- The examples below compose with @'requireExistence' 'DontRequire'@ @@ -278,14 +278,14 @@ requireExistence check opts = opts { _existenceCheck = check } -- -- >>> tmp <- getTemporaryDirectory -- >>> let opts m = resolveSymlinks m . requireExistence DontRequire --- >>> r1 <- realPath (opts OriginalParents) (tmp "a" ".." "b") +-- >>> r1 <- realPath (opts UseOriginalParents) (tmp "a" ".." "b") -- >>> r2 <- realPath (requireExistence DontRequire) (tmp "b") -- >>> r1 == r2 -- True -- -- 'DontResolve' collapses @..@ and @.@ textually and performs no -- symlink resolution (so the base is not canonicalized - the result --- may differ from 'TargetParents' when the base contains symlinks): +-- may differ from 'UseTargetParents' when the base contains symlinks): -- -- >>> r <- realPath (opts DontResolve) (tmp "a" ".." "b") -- >>> r == tmp "b" @@ -426,8 +426,8 @@ realPath modifier path = do let opts = modifier defaultConfig checkExistence (_existenceCheck opts) path resolved <- case _symlinkResolution opts of - TargetParents -> canonicalizePath path - OriginalParents -> + UseTargetParents -> canonicalizePath path + UseOriginalParents -> fmap lexicalCollapse (makeAbsolute path) >>= canonicalizePath DontResolve -> fmap lexicalCollapse (makeAbsolute path) -- Relativization and containment logic: From a4c9d08c2096d5c6f1a37871e6473d0ab0acd058 Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Mon, 20 Apr 2026 22:16:07 +0530 Subject: [PATCH 10/12] Update the README with realpath and id --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 7b6e2100..2e91a9ed 100644 --- a/README.md +++ b/README.md @@ -19,12 +19,12 @@ following coreutils-inspired as well as some additional commands: * Dir modify: `touch`, `ln`, `cp`, `mkdir`, `rm`, `mv` * File stat: `test`, `stat`, `touch` * File read/write: `cp` -* Symlink read: `readlink` -* Processes: `cd`, `pwd`, `sleep` -* Environment: `home` +* Symlinks: `readlink`, `realpath` +* Processes: `cd`, `pwd`, `sleep`, `id` +* UserDB: `id`, `home` * Text Processing: `cut`, `tail` * Shell: streaming composition of shell commands -* Paths: `dirname`, `which` +* Paths: `dirname`, `which`, `realpath`, `home` ## Important API Notice From e67a6caab950f82a842eb570bb457ecf061869f5 Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Tue, 21 Apr 2026 00:00:08 +0530 Subject: [PATCH 11/12] Rename module to resolvePath, rename some constr/functions --- .../Coreutils/{RealPath.hs => ResolvePath.hs} | 113 +++++++++--------- streamly-coreutils.cabal | 2 +- 2 files changed, 58 insertions(+), 57 deletions(-) rename src/Streamly/Coreutils/{RealPath.hs => ResolvePath.hs} (82%) diff --git a/src/Streamly/Coreutils/RealPath.hs b/src/Streamly/Coreutils/ResolvePath.hs similarity index 82% rename from src/Streamly/Coreutils/RealPath.hs rename to src/Streamly/Coreutils/ResolvePath.hs index c4f52ece..1ad6c7da 100644 --- a/src/Streamly/Coreutils/RealPath.hs +++ b/src/Streamly/Coreutils/ResolvePath.hs @@ -1,5 +1,5 @@ -- | --- Module : Streamly.Coreutils.RealPath +-- Module : Streamly.Coreutils.ResolvePath -- Copyright : (c) 2022 Composewell Technologies -- License : BSD-3-Clause -- Maintainer : streamly@composewell.com @@ -10,15 +10,15 @@ -- @.@ and @..@ segments, and follow every symbolic link along the way. -- Corresponds to the shell @realpath@ command. -- --- Call 'realPath' with @id@ for the default behavior, or compose +-- Call 'resolvePath' with @id@ for the default behavior, or compose -- modifiers with @(.)@ to customize: -- -- * 'requireExistence' - control which path components must exist on -- disk: 'AllParents' (default, GNU @-E@), 'EntirePath' (GNU @-e@), -- or 'DontRequire' (GNU @-m@). --- * 'resolveSymlinks' - control when (and whether) symbolic links +-- * 'resolutionMode' - control when (and whether) symbolic links -- are expanded: 'UseTargetParents' (default, GNU @-P@), 'UseOriginalParents' --- (GNU @-L@), or 'DontResolve' (GNU @-s@). +-- (GNU @-L@), or 'DontResolveSymlinks' (GNU @-s@). -- * 'relativeTo' - produce a path relative to a given base directory -- (GNU @realpath --relative-to=DIR@). -- * 'relativeIfWithin' - produce a relative path only if it's under @@ -38,25 +38,25 @@ -- * 'relativeTo' falls back to returning the canonicalized absolute -- path unchanged when no common prefix exists with the base -- (e.g. different drives on Windows). --- * 'resolveSymlinks' 'DontResolve' combined with 'requireExistence' +-- * 'resolutionMode' 'DontResolveSymlinks' combined with 'requireExistence' -- 'DontRequire' is the only configuration that performs no -- filesystem access on the path components (only -- 'System.Directory.getCurrentDirectory' when the path is -- relative). All other configurations involve some filesystem IO. --- * Because 'DontResolve' is lexical, it can give a different result +-- * Because 'DontResolveSymlinks' is lexical, it can give a different result -- than the default mode when the path traverses through a symlink -- via @..@: @\/link\/..@ lexically resolves to @\/@, but physically -- resolves to the parent of the symlink's target. -module Streamly.Coreutils.RealPath - ( RealPathOptions +module Streamly.Coreutils.ResolvePath + ( ResolvePathOptions , ExistenceCheck (..) - , SymlinkResolution (..) + , ResolutionMode (..) , requireExistence - , resolveSymlinks + , resolutionMode , relativeTo , relativeIfWithin - , realPath + , resolvePath ) where @@ -77,10 +77,10 @@ import System.FilePath -- -- * Thin wrapper over 'System.Directory.canonicalizePath' for the -- default 'UseTargetParents' (physical) mode; 'UseOriginalParents' and --- 'DontResolve' use 'makeAbsolute' plus a custom @..@-collapsing +-- 'DontResolveSymlinks' use 'makeAbsolute' plus a custom @..@-collapsing -- walker ('lexicalCollapse'). -- --- * Why a single 'SymlinkResolution' enum instead of two flags. +-- * Why a single 'ResolutionMode' enum instead of two flags. -- Symlink expansion is a three-way choice, not two orthogonal -- booleans. An earlier iteration exposed 'logical' and 'noSymlinks' -- as separate modifiers, which required a precedence rule for @@ -135,7 +135,7 @@ import System.FilePath -- @realpath -e -s@ and @realpath -e -L@. -- -- * 'relativeTo' always canonicalizes the base physically (following --- symlinks) regardless of the 'SymlinkResolution' mode. Otherwise +-- symlinks) regardless of the 'ResolutionMode' mode. Otherwise -- @relativeTo "foo/../bar"@ with a lexical base would give -- surprising results. If a future use case needs a lexical base, -- add a separate modifier rather than overloading this one. @@ -152,7 +152,7 @@ import System.FilePath -- failure is an exceptional condition, not a lookup miss - matches -- the error-handling guidance in the package design notes. --- | Which components of a path must exist on disk for 'realPath' to +-- | Which components of a path must exist on disk for 'resolvePath' to -- succeed. -- -- * 'EntirePath': every component - including the leaf - must exist. @@ -184,26 +184,26 @@ data ExistenceCheck -- regardless of whether that segment was a symlink. Remaining -- symlinks in the surviving path are still expanded. Matches GNU -- @realpath -L@ / @--logical@. --- * 'DontResolve': no symlinks are expanded anywhere in the path. +-- * 'DontResolveSymlinks': no symlinks are expanded anywhere in the path. -- @..@ is lexical (same as 'UseOriginalParents'), and symlinks in -- other components are preserved as-is. Matches GNU @realpath -s@ -- / @--no-symlinks@. -- -- The three modes produce the same result on paths that contain no -- symlinks. 'UseTargetParents' and 'UseOriginalParents' diverge when a --- symlink is followed by @..@; 'DontResolve' diverges from both +-- symlink is followed by @..@; 'DontResolveSymlinks' diverges from both -- whenever the path contains any symlink. -data SymlinkResolution +data ResolutionMode = UseTargetParents | UseOriginalParents - | DontResolve + | DontResolveSymlinks --- | Options for 'realPath'. Users don't construct 'RealPathOptions' +-- | Options for 'resolvePath'. Users don't construct 'ResolvePathOptions' -- directly - instead, pass @id@ for the default behavior, or a --- modifier (or composition of modifiers with @(.)@) to 'realPath'. -data RealPathOptions = RealPathOptions +-- modifier (or composition of modifiers with @(.)@) to 'resolvePath'. +data ResolvePathOptions = ResolvePathOptions { _existenceCheck :: ExistenceCheck - , _symlinkResolution :: SymlinkResolution + , _resolutionMode :: ResolutionMode , _relativeTo :: Maybe FilePath , _relativeIfWithin :: Maybe FilePath } @@ -211,10 +211,10 @@ data RealPathOptions = RealPathOptions -- Default configuration: the seed value that modifiers are composed -- onto. Users supply @id@ (or a modifier chain) at the call site -- rather than referring to this directly. -defaultConfig :: RealPathOptions -defaultConfig = RealPathOptions +defaultConfig :: ResolvePathOptions +defaultConfig = ResolvePathOptions { _existenceCheck = AllParents - , _symlinkResolution = UseTargetParents + , _resolutionMode = UseTargetParents , _relativeTo = Nothing , _relativeIfWithin = Nothing } @@ -229,12 +229,12 @@ defaultConfig = RealPathOptions -- 'EntirePath' rejects a path whose leaf does not exist: -- -- >>> cwd <- getCurrentDirectory --- >>> r1 <- realPath (requireExistence EntirePath) cwd --- >>> r2 <- realPath (requireExistence EntirePath) r1 +-- >>> r1 <- resolvePath (requireExistence EntirePath) cwd +-- >>> r2 <- resolvePath (requireExistence EntirePath) r1 -- >>> r1 == r2 -- True -- --- >>> result <- try (realPath (requireExistence EntirePath) "/definitely/does/not/exist/xyzzy") :: IO (Either SomeException FilePath) +-- >>> result <- try (resolvePath (requireExistence EntirePath) "/definitely/does/not/exist/xyzzy") :: IO (Either SomeException FilePath) -- >>> either (const True) (const False) result -- True -- @@ -244,27 +244,28 @@ defaultConfig = RealPathOptions -- the existing prefix): -- -- >>> tmp <- getTemporaryDirectory --- >>> r1 <- realPath id (tmp "missing-leaf") +-- >>> r1 <- resolvePath id (tmp "missing-leaf") -- >>> r2 <- canonicalizePath (tmp "missing-leaf") -- >>> r1 == r2 -- True -- -- 'AllParents' rejects a path whose parent does not exist: -- --- >>> result <- try (realPath id "/definitely/does/not/exist/child") :: IO (Either SomeException FilePath) +-- >>> result <- try (resolvePath id "/definitely/does/not/exist/child") :: IO (Either SomeException FilePath) -- >>> either (const True) (const False) result -- True -- -- 'DontRequire' accepts any path, existent or not: -- --- >>> r <- realPath (requireExistence DontRequire) "/definitely/does/not/exist/child" +-- >>> r <- resolvePath (requireExistence DontRequire) "/definitely/does/not/exist/child" -- >>> null r -- False -requireExistence :: ExistenceCheck -> RealPathOptions -> RealPathOptions +requireExistence :: ExistenceCheck -> ResolvePathOptions -> ResolvePathOptions requireExistence check opts = opts { _existenceCheck = check } --- | Choose how @..@ and symbolic links interact. See --- 'SymlinkResolution' for the three modes and a full explanation. +-- | Set the resolution mode - how @..@ segments and symbolic links +-- are handled. See 'ResolutionMode' for the three modes and a full +-- explanation. -- -- Default (without this modifier): 'UseTargetParents' - @..@ ascends -- from the symlink's target (GNU @realpath@'s physical mode, @-P@). @@ -277,24 +278,24 @@ requireExistence check opts = opts { _existenceCheck = check } -- base): -- -- >>> tmp <- getTemporaryDirectory --- >>> let opts m = resolveSymlinks m . requireExistence DontRequire --- >>> r1 <- realPath (opts UseOriginalParents) (tmp "a" ".." "b") --- >>> r2 <- realPath (requireExistence DontRequire) (tmp "b") +-- >>> let opts m = resolutionMode m . requireExistence DontRequire +-- >>> r1 <- resolvePath (opts UseOriginalParents) (tmp "a" ".." "b") +-- >>> r2 <- resolvePath (requireExistence DontRequire) (tmp "b") -- >>> r1 == r2 -- True -- --- 'DontResolve' collapses @..@ and @.@ textually and performs no +-- 'DontResolveSymlinks' collapses @..@ and @.@ textually and performs no -- symlink resolution (so the base is not canonicalized - the result -- may differ from 'UseTargetParents' when the base contains symlinks): -- --- >>> r <- realPath (opts DontResolve) (tmp "a" ".." "b") +-- >>> r <- resolvePath (opts DontResolveSymlinks) (tmp "a" ".." "b") -- >>> r == tmp "b" -- True --- >>> r <- realPath (opts DontResolve) (tmp "." "x") +-- >>> r <- resolvePath (opts DontResolveSymlinks) (tmp "." "x") -- >>> r == tmp "x" -- True -resolveSymlinks :: SymlinkResolution -> RealPathOptions -> RealPathOptions -resolveSymlinks mode opts = opts { _symlinkResolution = mode } +resolutionMode :: ResolutionMode -> ResolvePathOptions -> ResolvePathOptions +resolutionMode mode opts = opts { _resolutionMode = mode } -- | Return the canonical path relative to the given base directory. -- Corresponds to GNU @realpath --relative-to=DIR@. @@ -312,9 +313,9 @@ resolveSymlinks mode opts = opts { _symlinkResolution = mode } -- A path relative to itself is @\".\"@: -- -- >>> cwd <- getCurrentDirectory --- >>> realPath (relativeTo cwd) cwd +-- >>> resolvePath (relativeTo cwd) cwd -- "." -relativeTo :: FilePath -> RealPathOptions -> RealPathOptions +relativeTo :: FilePath -> ResolvePathOptions -> ResolvePathOptions relativeTo base opts = opts { _relativeTo = Just base } -- | Return a relative path only when the resolved path lies within @@ -334,16 +335,16 @@ relativeTo base opts = opts { _relativeTo = Just base } -- -- >>> tmp <- getTemporaryDirectory -- >>> let child = tmp "missing-leaf" --- >>> realPath (relativeIfWithin tmp) child +-- >>> resolvePath (relativeIfWithin tmp) child -- "missing-leaf" -- -- Outside the boundary, the absolute path is returned unchanged: -- --- >>> r1 <- realPath (relativeIfWithin tmp) "/" +-- >>> r1 <- resolvePath (relativeIfWithin tmp) "/" -- >>> r2 <- canonicalizePath "/" -- >>> r1 == r2 -- True -relativeIfWithin :: FilePath -> RealPathOptions -> RealPathOptions +relativeIfWithin :: FilePath -> ResolvePathOptions -> ResolvePathOptions relativeIfWithin dir opts = opts { _relativeIfWithin = Just dir } -- Collapse @.@ and @..@ segments lexically. On absolute paths, @..@ @@ -384,14 +385,14 @@ checkExistence check path = case check of exists <- doesPathExist path when (not exists) $ ioError - (userError ("realPath: path does not exist: " ++ path)) + (userError ("resolvePath: path does not exist: " ++ path)) AllParents -> do let parent = takeDirectory path parentExists <- doesDirectoryExist parent when (not parentExists) $ ioError (userError - ("realPath: parent directory does not exist: " + ("resolvePath: parent directory does not exist: " ++ parent)) -- Is the second path a descendant of (or equal to) the first? @@ -415,21 +416,21 @@ isPathUnder dir p = splitDirectories dir `isPrefixOf` splitDirectories p -- The default-mode result on an existing directory is absolute: -- -- >>> cwd <- getCurrentDirectory --- >>> r <- realPath id cwd +-- >>> r <- resolvePath id cwd -- >>> isAbsolute r -- True -realPath - :: (RealPathOptions -> RealPathOptions) +resolvePath + :: (ResolvePathOptions -> ResolvePathOptions) -> FilePath -> IO FilePath -realPath modifier path = do +resolvePath modifier path = do let opts = modifier defaultConfig checkExistence (_existenceCheck opts) path - resolved <- case _symlinkResolution opts of + resolved <- case _resolutionMode opts of UseTargetParents -> canonicalizePath path UseOriginalParents -> fmap lexicalCollapse (makeAbsolute path) >>= canonicalizePath - DontResolve -> fmap lexicalCollapse (makeAbsolute path) + DontResolveSymlinks -> fmap lexicalCollapse (makeAbsolute path) -- Relativization and containment logic: -- * _relativeTo chooses the target to relativize against. -- * _relativeIfWithin gates whether relativization fires: if diff --git a/streamly-coreutils.cabal b/streamly-coreutils.cabal index 027d7687..05856e08 100644 --- a/streamly-coreutils.cabal +++ b/streamly-coreutils.cabal @@ -130,7 +130,7 @@ library , Streamly.Coreutils.Mkdir , Streamly.Coreutils.Mv , Streamly.Coreutils.ReadLink - , Streamly.Coreutils.RealPath + , Streamly.Coreutils.ResolvePath , Streamly.Coreutils.Rm , Streamly.Coreutils.Sh , Streamly.Coreutils.Sleep From 6872df1f66ca9b36c8c16c3a9f3b2ca61bcc8c5f Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Tue, 21 Apr 2026 00:27:13 +0530 Subject: [PATCH 12/12] Add GNU realpath equivalences --- src/Streamly/Coreutils/ResolvePath.hs | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/src/Streamly/Coreutils/ResolvePath.hs b/src/Streamly/Coreutils/ResolvePath.hs index 1ad6c7da..8dabdffb 100644 --- a/src/Streamly/Coreutils/ResolvePath.hs +++ b/src/Streamly/Coreutils/ResolvePath.hs @@ -28,6 +28,28 @@ -- Each modifier's Haddock describes the default that applies in its -- absence. -- +-- == GNU @realpath@ equivalences +-- +-- Each binding below corresponds to a common GNU @realpath@ flag +-- combination. +-- +-- Default (GNU @-E -P@, no relative output): +-- +-- >>> _ = resolvePath id -- realpath +-- >>> _ = resolvePath (requireExistence EntirePath) -- realpath -e +-- >>> _ = resolvePath (requireExistence AllParents) -- realpath -E +-- >>> _ = resolvePath (requireExistence DontRequire) -- realpath -m +-- >>> _ = resolvePath (resolutionMode UseTargetParents) -- realpath -P +-- >>> _ = resolvePath (resolutionMode UseOriginalParents) -- realpath -L +-- >>> _ = resolvePath (resolutionMode DontResolveSymlinks) -- realpath -s +-- >>> _ = resolvePath (relativeTo "/usr/bin") -- realpath --relative-to=/usr/bin +-- >>> _ = resolvePath (relativeIfWithin "/usr") -- realpath --relative-base=/usr +-- +-- Composed modifiers: +-- +-- >>> -- realpath --relative-to=/usr/bin --relative-base=/usr +-- >>> _ = resolvePath (relativeTo "/usr/bin" . relativeIfWithin "/usr") +-- -- == Caveats -- -- * On Windows, @subst@ drives are resolved through to their