Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 20 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,15 +1,27 @@
# Streamly Coreutils
# Streamly Coreutils (Fast, Concurrent and Powerful)

This repository provides Haskell functions that reimplement common
GNU `coreutils` commands, utilizing the `streamly` library for
efficient, streaming data processing where applicable. The goal is to
offer a functional and highly performant alternative to traditional
This repository provides Haskell functions that reimplement common GNU
`coreutils` commands, utilizing the `streamly` library for efficient,
and concurrent streaming data processing where applicable. The goal is
to offer a highly composable and performant alternative to traditional
shell commands within Haskell applications, enabling complex data
transformations, system programming and scripting using a pure functional
paradigm. Where applicable, the implementation is designed to be
highly concurrent, for example, the `ls` equivalent can list directory
transformations, system programming and scripting using a pure
functional paradigm. Where applicable, the implementation is designed
to be concurrent, for example, the `find` equivalent can list directory
contents concurrently for improved performance.

# Fast, Concurrent and Powerful

How is it fast? For example, the serial implementation of `find` is
faster than the fastest yet find implementation which is rust `fd`. How
is it concurrent? Concurrency comes for free using the Haskell streamly
library, so wherever possible the implementation is concurrent and if
you need concurrency somewhere it can be made concurrent trivially. How
is it powerful? For example, the find implementation has many choices
like bfs, dfs, interleaved, concurrent unordered, concurrent ordered,
concurrent interleaved, all these are trivial to implement thanks to
Haskell streamly.

## Implemented Commands

Currently, this library provides implementations for the
Expand Down
136 changes: 136 additions & 0 deletions app/hfd.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
module Main (main) where

import System.IO (stdout)

import Options.Applicative
( Parser
, ParserInfo
, briefDesc
, execParser
, fullDesc
, header
, help
, helper
, info
, long
, metavar
, optional
, option
, progDesc
, strArgument
, (<**>)
)
import qualified Options.Applicative as OA
import qualified Streamly.Data.Stream.Prelude as Stream
import qualified Streamly.FileSystem.Handle as Handle
import qualified Streamly.FileSystem.Path as Path

import Streamly.Coreutils.Find
( FindOptions
, findByteChunked
, maxResults
, parallelInterleaved
, parallelOrdered
, parallelUnordered
, serialAppend
, serialBfs
, serialBfsRev
, serialDfs
, serialInterleaved
)

data Config = Config
{ cfgTraversal :: FindOptions -> FindOptions
, cfgRoot :: FilePath
, cfgMaxResults :: Maybe Int
}

data Traversal
= TraversalDfs
| TraversalBfs
| TraversalBfsRev
| TraversalAppend
| TraversalInterleaved
| TraversalParallel
| TraversalParallelInterleaved
| TraversalParallelOrdered

toTraversalConfig :: Traversal -> FindOptions -> FindOptions
toTraversalConfig traversal =
case traversal of
TraversalDfs -> serialDfs
TraversalBfs -> serialBfs
TraversalBfsRev -> serialBfsRev
TraversalAppend -> serialAppend
TraversalInterleaved -> serialInterleaved
TraversalParallel -> parallelUnordered
TraversalParallelInterleaved -> parallelInterleaved
TraversalParallelOrdered -> parallelOrdered

mkConfig :: Traversal -> Maybe Int -> Maybe FilePath -> Config
mkConfig traversal mMaxResults mPath =
Config
{ cfgTraversal = toTraversalConfig traversal
, cfgRoot = maybe "." id mPath
, cfgMaxResults = mMaxResults
}

traversalParser :: Parser Traversal
traversalParser =
OA.flag' TraversalBfs
(long "bfs" <> help "Breadth-first traversal")
OA.<|> OA.flag' TraversalBfsRev
(long "bfs-rev" <> help "Reverse breadth-first traversal")
OA.<|> OA.flag' TraversalAppend
(long "append" <> help "Serial append traversal")
OA.<|> OA.flag' TraversalInterleaved
(long "interleaved" <> help "Serial interleaved traversal")
OA.<|> OA.flag' TraversalParallel
(long "parallel" <> help "Parallel unordered traversal")
OA.<|> OA.flag' TraversalParallelInterleaved
(long "parallel-interleaved" <> help "Parallel interleaved traversal")
OA.<|> OA.flag' TraversalParallelOrdered
(long "parallel-ordered" <> help "Parallel ordered traversal")
OA.<|> OA.flag' TraversalDfs
(long "dfs" <> help "Depth-first traversal")
OA.<|> OA.pure TraversalDfs

configParser :: Parser Config
configParser =
mkConfig
<$> traversalParser
<*> optional
(option (OA.eitherReader parsePositiveInt)
(long "max-results"
<> metavar "N"
<> help "Stop after emitting N results"))
<*> optional
(strArgument
(metavar "PATH" <> help "Root path to search"))

parserInfo :: ParserInfo Config
parserInfo =
info
(configParser <**> helper)
(fullDesc
<> briefDesc
<> progDesc "A basic fd-like driver for Streamly.Coreutils.Find."
<> header "hfd")

main :: IO ()
main = do
cfg <- execParser parserInfo
path <- Path.fromString (cfgRoot cfg)
let applyConfig opts =
maybe id maxResults (cfgMaxResults cfg) $
cfgTraversal cfg opts
Stream.fold (Handle.writeChunks stdout)
$ findByteChunked applyConfig path

parsePositiveInt :: String -> Either String Int
parsePositiveInt str =
case reads str of
[(n, "")]
| n > 0 -> Right n
| otherwise -> Left "N must be positive"
_ -> Left "N must be an integer"
Loading
Loading