Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions core/src/Streamly/FileSystem/DirIO.hs
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@ module Streamly.FileSystem.DirIO
#else
ReadOptions
, followSymlinks
, ignoreNonExisting
, ignoreLoopErrors
, ignoreInAccessible
, ignoreMissing
, ignoreSymlinkLoops
, ignoreInaccessible
#endif
-- * Streams
, read
Expand Down
2 changes: 1 addition & 1 deletion core/src/Streamly/Internal/FileSystem/Dir.hs
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ pMapUnfoldE = fmap ePathMap . Unfold.lmapM Path.fromString
--
{-# INLINE reader #-}
reader :: (MonadIO m, MonadCatch m) => Unfold m FilePath FilePath
reader = fmap Path.toString $ Unfold.lmapM Path.fromString (DirIO.reader CONF)
reader = fmap Path.toString $ Unfold.lmapM Path.fromString DirIO.reader

-- | Read directories as Left and files as Right. Filter out "." and ".."
-- entries.
Expand Down
21 changes: 6 additions & 15 deletions core/src/Streamly/Internal/FileSystem/DirIO.hs
Original file line number Diff line number Diff line change
Expand Up @@ -81,14 +81,7 @@ module Streamly.Internal.FileSystem.DirIO
-- getMetadata GetMetadata (followSymlinks, noAutoMount - see fstatat)

-- * Configuration
ReadOptions
, defaultReadOptions
#if !defined(mingw32_HOST_OS) && !defined(__MINGW32__)
, followSymlinks
, ignoreNonExisting
, ignoreLoopErrors
, ignoreInAccessible
#endif
module Streamly.Internal.FileSystem.DirOptions

-- * Streams
, read
Expand Down Expand Up @@ -156,18 +149,16 @@ import Streamly.Internal.Data.Unfold.Type (Unfold(..))
import Streamly.Internal.FileSystem.Path (Path)
#if defined(mingw32_HOST_OS) || defined(__MINGW32__)
import qualified Streamly.Internal.Data.Fold as Fold
import Streamly.Internal.FileSystem.Windows.ReadDir
(eitherReader, reader, ReadOptions, defaultReadOptions)
import Streamly.Internal.FileSystem.Windows.ReadDir (eitherReader, reader)
#else
import Streamly.Internal.FileSystem.Posix.ReadDir
( readEitherChunks, eitherReader, reader, ReadOptions, defaultReadOptions
, followSymlinks, ignoreNonExisting, ignoreLoopErrors, ignoreInAccessible
)
( readEitherChunks, eitherReader, reader)
#endif
import qualified Streamly.Internal.Data.Stream as S
import qualified Streamly.Data.Unfold as UF
import qualified Streamly.Internal.FileSystem.Path as Path

import Streamly.Internal.FileSystem.DirOptions
import Prelude hiding (read)

{-
Expand Down Expand Up @@ -340,9 +331,9 @@ dirReader f = fmap (fromLeft undefined) $ UF.filter isLeft (eitherReader f)
--
-- /Pre-release/
{-# INLINE read #-}
read :: (MonadIO m, MonadCatch m) => (ReadOptions -> ReadOptions) ->
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why did you remove readoptions here?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it is not used, then why pass it. There is no reason for this operation to have an option.

read :: (MonadIO m, MonadCatch m) =>
Path -> Stream m Path
read f = S.unfold (reader f)
read = S.unfold reader

-- | Read directories as Left and files as Right. Filter out "." and ".."
-- entries. The output contains the names of the directories and files.
Expand Down
125 changes: 125 additions & 0 deletions core/src/Streamly/Internal/FileSystem/DirOptions.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
-- |
-- Module : Streamly.Internal.FileSystem.DirOptions
-- Copyright : (c) 2024 Composewell Technologies
--
-- License : BSD3
-- Maintainer : streamly@composewell.com
-- Portability : GHC

module Streamly.Internal.FileSystem.DirOptions
(
ReadOptions (..)
, followSymlinks
, ignoreMissing
, ignoreSymlinkLoops
, ignoreInaccessible
, defaultReadOptions
)
where

-- NOTE: If we are following symlinks, then we want to determine the type of
-- the link destination not the link itself, so we need to use stat instead of
-- lstat for resolving the symlink.
--
-- For recursive traversal, instead of classifying the dirents using stat, we
-- can leave them unclassified, and deal with ENOTDIR when doing an opendir. We
-- can just ignore that error if it is not a dir. This way we do not need to do
-- stat at all. Or we can basically say don't try to determine the type of
-- symlinks and always try to read symlinks as dirs. We can have an option for
-- classifying symlinks or DT_UNKNOWN as potential dirs.

-- When resolving a symlink we may encounter errors only if the directory entry
-- is a symlink. If the directory entry is not a symlink then stat on it will
-- have permissions, it will not give ELOOP or ENOENT unless the file was
-- deleted or recreated after we read the dirent.

-- | Options controlling the behavior of directory read.
data ReadOptions =
ReadOptions
{ _followSymlinks :: Bool
, _ignoreELOOP :: Bool
, _ignoreENOENT :: Bool
, _ignoreEACCESS :: Bool
}

-- | Control how symbolic links are handled when determining the type
-- of a directory entry.
--
-- * If set to 'True', symbolic links are resolved before classification.
-- This means a symlink pointing to a directory will be treated as a
-- directory, and a symlink pointing to a file will be treated as a
-- non-directory.
--
-- * If set to 'False', all symbolic links are classified as non-directories,
-- without attempting to resolve their targets.
--
-- Enabling resolution may cause additional errors to occur due to
-- insufficient permissions, broken links, or symlink loops. Such errors
-- can be ignored or handled using the appropriate options.
--
-- The default is 'False'.
--
-- On Windows this option has no effect as of now, symlinks are not followed to
-- determine the type.
followSymlinks :: Bool -> ReadOptions -> ReadOptions
followSymlinks x opts = opts {_followSymlinks = x}

-- | When the 'followSymlinks' option is enabled and a directory entry is a
-- symbolic link, we resolve it to determine the type of the symlink target.
-- This option controls the behavior when encountering symlink loop errors
-- during resolution.
--
-- When set to 'True', symlink loop errors are ignored, and the type is
-- reported as not a directory. When set to 'False', the directory read
-- operation fails with an error.
--
-- The default is 'False'.
--
-- On Windows this option has no effect as of now, symlinks are not followed to
-- determine the type.
ignoreSymlinkLoops :: Bool -> ReadOptions -> ReadOptions
ignoreSymlinkLoops x opts = opts {_ignoreELOOP = x}

-- | When the 'followSymlinks' option is enabled and a directory entry is a
-- symbolic link, we resolve it to determine the type of the symlink target.
-- This option controls the behavior when encountering broken symlink errors
-- during resolution.
--
-- When set to 'True', broken symlink errors are ignored, and the type is
-- reported as not a directory. When set to 'False', the directory read
-- operation fails with an error.
--
-- The default is 'False'.
--
-- On Windows this option has no effect as of now, symlinks are not followed to
-- determine the type.
ignoreMissing :: Bool -> ReadOptions -> ReadOptions
ignoreMissing x opts = opts {_ignoreENOENT = x}

-- | When the 'followSymlinks' option is enabled and a directory entry is a
-- symbolic link, we resolve it to determine the type of the symlink target.
-- This option controls the behavior when encountering permission errors
-- during resolution.
--
-- When set to 'True', any permission errors are ignored, and the type is
-- reported as not a directory. When set to 'False', the directory read
-- operation fails with an error.
--
-- The default is 'False'.
--
-- On Windows this option has no effect as of now, symlinks are not followed to
-- determine the type.
ignoreInaccessible :: Bool -> ReadOptions -> ReadOptions
ignoreInaccessible x opts = opts {_ignoreEACCESS = x}

-- XXX find ignores errors when following symlinks, by default.
-- NOTE: The defaultReadOptions emulates the behaviour of "find".
--
defaultReadOptions :: ReadOptions
defaultReadOptions =
ReadOptions
{ _followSymlinks = False
, _ignoreELOOP = False
, _ignoreENOENT = False
, _ignoreEACCESS = False
}
61 changes: 11 additions & 50 deletions core/src/Streamly/Internal/FileSystem/Posix/ReadDir.hsc
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,7 @@
module Streamly.Internal.FileSystem.Posix.ReadDir
(
#if !defined(mingw32_HOST_OS) && !defined(__MINGW32__)
ReadOptions
, followSymlinks
, ignoreNonExisting
, ignoreLoopErrors
, ignoreInAccessible
, defaultReadOptions

, readScanWith_
readScanWith_
, readScanWith
, readPlusScanWith

Expand Down Expand Up @@ -68,6 +61,8 @@ import qualified Streamly.Internal.Data.Unfold as UF (bracketIO)
import qualified Streamly.Internal.FileSystem.Path.Common as PathC
import qualified Streamly.Internal.FileSystem.PosixPath as Path

import Streamly.Internal.FileSystem.DirOptions

#include <dirent.h>
#include <sys/stat.h>

Expand All @@ -79,37 +74,6 @@ data {-# CTYPE "struct stat" #-} CStat

newtype DirStream = DirStream (Ptr CDir)

data ReadOptions =
ReadOptions
{ _followSymlinks :: Bool
, _ignoreSymlinkLoopErrors :: Bool
, _ignoreNonExistingFiles :: Bool
, _ignoreInAccessibleFiles :: Bool
}

followSymlinks :: Bool -> ReadOptions -> ReadOptions
followSymlinks x opts = opts {_followSymlinks = x}

ignoreLoopErrors :: Bool -> ReadOptions -> ReadOptions
ignoreLoopErrors x opts = opts {_ignoreSymlinkLoopErrors = x}

ignoreNonExisting :: Bool -> ReadOptions -> ReadOptions
ignoreNonExisting x opts = opts {_ignoreNonExistingFiles = x}

ignoreInAccessible :: Bool -> ReadOptions -> ReadOptions
ignoreInAccessible x opts = opts {_ignoreInAccessibleFiles = x}

-- NOTE: The defaultReadOptions emulate the behaviour of "find".
--
defaultReadOptions :: ReadOptions
defaultReadOptions =
ReadOptions
{ _followSymlinks = False
, _ignoreSymlinkLoopErrors = False
, _ignoreNonExistingFiles = False
, _ignoreInAccessibleFiles = False
}

-- | Minimal read without any metadata.
{-# INLINE readScanWith_ #-}
readScanWith_ :: -- (MonadIO m, MonadCatch m) =>
Expand Down Expand Up @@ -305,13 +269,13 @@ statEntryType conf parent dname = do
else EntryIsNotDir
Left errno -> do
if errno == eNOENT
then unless (_ignoreNonExistingFiles conf) $
then unless (_ignoreENOENT conf) $
throwErrno (errMsg path)
else if errno == eACCES
then unless (_ignoreInAccessibleFiles conf) $
then unless (_ignoreEACCESS conf) $
throwErrno (errMsg path)
else if errno == eLOOP
then unless (_ignoreSymlinkLoopErrors conf) $
then unless (_ignoreELOOP conf) $
throwErrno (errMsg path)
else throwErrno (errMsg path)
pure $ EntryIgnored
Expand Down Expand Up @@ -414,10 +378,8 @@ streamEitherReader confMod = Unfold step return
Just x -> return $ Yield x s

{-# INLINE streamReader #-}
streamReader
:: MonadIO m
=> (ReadOptions -> ReadOptions) -> Unfold m (PosixPath, DirStream) Path
streamReader confMod = fmap (either id id) (streamEitherReader confMod)
streamReader :: MonadIO m => Unfold m (PosixPath, DirStream) Path
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the reasoning of where to keep readoptions?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

where it is needed.

streamReader = fmap (either id id) (streamEitherReader id)

{-# INLINE before #-}
before :: PosixPath -> IO (PosixPath, DirStream)
Expand All @@ -433,14 +395,13 @@ after (_, dirStream) = closeDirStream dirStream
-- /Internal/
--
{-# INLINE reader #-}
reader :: (MonadIO m, MonadCatch m)
=> (ReadOptions -> ReadOptions) -> Unfold m Path Path
reader confMod =
reader :: (MonadIO m, MonadCatch m) => Unfold m Path Path
reader =
-- XXX Instead of using bracketIO for each iteration of the loop we should
-- instead yield a buffer of dir entries in each iteration and then use an
-- unfold and concat to flatten those entries. That should improve the
-- performance.
UF.bracketIO before after (streamReader confMod)
UF.bracketIO before after (streamReader)

-- | Read directories as Left and files as Right. Filter out "." and ".."
-- entries.
Expand Down
34 changes: 14 additions & 20 deletions core/src/Streamly/Internal/FileSystem/Windows/ReadDir.hsc
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,7 @@
module Streamly.Internal.FileSystem.Windows.ReadDir
(
#if defined(mingw32_HOST_OS) || defined(__MINGW32__)
ReadOptions
, defaultReadOptions

, DirStream
DirStream
, openDirStream
, closeDirStream
, readDirStreamEither
Expand Down Expand Up @@ -43,6 +40,7 @@ import qualified Streamly.Internal.Data.Unfold as UF (bracketIO)
import qualified Streamly.Internal.FileSystem.WindowsPath as Path
import qualified System.Win32 as Win32 (failWith)

import Streamly.Internal.FileSystem.DirOptions
import Foreign hiding (void)

#include <windows.h>
Expand Down Expand Up @@ -137,15 +135,6 @@ failIf p wh act = do
iNVALID_HANDLE_VALUE :: HANDLE
iNVALID_HANDLE_VALUE = castUINTPtrToPtr maxBound

------------------------------------------------------------------------------
-- Config
------------------------------------------------------------------------------

data ReadOptions = ReadOptions

defaultReadOptions :: ReadOptions
defaultReadOptions = ReadOptions

------------------------------------------------------------------------------
-- Dir stream implementation
------------------------------------------------------------------------------
Expand Down Expand Up @@ -206,6 +195,13 @@ readDirStreamEither _ (DirStream (h, ref, fdata)) =

where

-- XXX: for a symlink the attribute may have a FILE_ATTRIBUTE_DIRECTORY if
-- the symlink was created as a directory symlink, but it might have
-- changed later. To find the real type of the symlink when we have
-- followSymlinks option on we need to check if it is a
-- FILE_ATTRIBUTE_REPARSE_POINT, we need to open the reparse point and find
-- the type.

processEntry ptr = do
let dname = #{ptr WIN32_FIND_DATAW, cFileName} ptr
dattrs :: #{type DWORD} <-
Expand Down Expand Up @@ -244,24 +240,22 @@ streamEitherReader f = Unfold step return
Just x -> return $ Yield x strm

{-# INLINE streamReader #-}
streamReader :: MonadIO m =>
(ReadOptions -> ReadOptions) -> Unfold m DirStream Path
streamReader f = fmap (either id id) (streamEitherReader f)
streamReader :: MonadIO m => Unfold m DirStream Path
streamReader = fmap (either id id) (streamEitherReader id)

-- | Read a directory emitting a stream with names of the children. Filter out
-- "." and ".." entries.
--
-- /Internal/

{-# INLINE reader #-}
reader :: (MonadIO m, MonadCatch m) =>
(ReadOptions -> ReadOptions) -> Unfold m Path Path
reader f =
reader :: (MonadIO m, MonadCatch m) => Unfold m Path Path
reader =
-- XXX Instead of using bracketIO for each iteration of the loop we should
-- instead yield a buffer of dir entries in each iteration and then use an
-- unfold and concat to flatten those entries. That should improve the
-- performance.
UF.bracketIO openDirStream closeDirStream (streamReader f)
UF.bracketIO openDirStream closeDirStream streamReader

-- | Read directories as Left and files as Right. Filter out "." and ".."
-- entries.
Expand Down
1 change: 1 addition & 0 deletions core/streamly-core.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -564,6 +564,7 @@ library

, Streamly.Internal.Data.Time.Clock.Type
, Streamly.Internal.FileSystem.Path.Common
, Streamly.Internal.FileSystem.DirOptions

if flag(internal-dev)
exposed-modules:
Expand Down
Loading
Loading