From 80d06032123ca5edf143b6d263517e0a44158207 Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Sat, 28 Dec 2024 20:42:07 +0530 Subject: [PATCH 1/7] Rename splitOn and indexOnSuffix --- core/src/Streamly/Internal/Data/Array.hs | 13 +++++---- .../Streamly/Internal/Data/MutArray/Type.hs | 15 ++++++---- .../src/Streamly/Internal/Data/Stream/Type.hs | 29 ++++++++++--------- 3 files changed, 34 insertions(+), 23 deletions(-) diff --git a/core/src/Streamly/Internal/Data/Array.hs b/core/src/Streamly/Internal/Data/Array.hs index da6c77fc06..d895b4be6c 100644 --- a/core/src/Streamly/Internal/Data/Array.hs +++ b/core/src/Streamly/Internal/Data/Array.hs @@ -57,7 +57,7 @@ module Streamly.Internal.Data.Array -- , getSlice , sliceIndexerFromLen , slicerFromLen - , splitOn -- XXX slicesEndBy + , sliceEndBy_ -- * Streaming Operations , streamTransform @@ -101,6 +101,7 @@ module Streamly.Internal.Data.Array , pinnedCompactLE , compactOnByte , compactOnByteSuffix + , splitOn ) where @@ -310,12 +311,14 @@ getSliceUnsafe index len (Array contents start e) = -- matching the predicate is dropped. -- -- /Pre-release/ -{-# INLINE splitOn #-} -splitOn :: (Monad m, Unbox a) => +{-# INLINE sliceEndBy_ #-} +sliceEndBy_, splitOn :: (Monad m, Unbox a) => (a -> Bool) -> Array a -> Stream m (Array a) -splitOn predicate arr = +sliceEndBy_ predicate arr = fmap (\(i, len) -> getSliceUnsafe i len arr) - $ D.indexOnSuffix predicate (read arr) + $ D.indexEndBy_ predicate (read arr) + +RENAME(splitOn,sliceEndBy_) {-# INLINE sliceIndexerFromLen #-} sliceIndexerFromLen :: forall m a. (Monad m, Unbox a) diff --git a/core/src/Streamly/Internal/Data/MutArray/Type.hs b/core/src/Streamly/Internal/Data/MutArray/Type.hs index 9d8a370feb..615ec399b4 100644 --- a/core/src/Streamly/Internal/Data/MutArray/Type.hs +++ b/core/src/Streamly/Internal/Data/MutArray/Type.hs @@ -257,7 +257,7 @@ module Streamly.Internal.Data.MutArray.Type -- | Split an array into slices. -- , getSlicesFromLenN - , splitOn -- slicesEndBy + , sliceEndBy_ -- , slicesOf -- *** Concat @@ -359,6 +359,7 @@ module Streamly.Internal.Data.MutArray.Type , pinnedFromList , pinnedClone , unsafePinnedCreateOf + , splitOn ) where @@ -2891,12 +2892,16 @@ spliceExp = spliceWith (\l1 l2 -> max (l1 * 2) (l1 + l2)) -- matching the predicate is dropped. -- -- /Pre-release/ -{-# INLINE splitOn #-} -splitOn :: (MonadIO m, Unbox a) => +{-# INLINE sliceEndBy_ #-} +sliceEndBy_, splitOn :: (MonadIO m, Unbox a) => (a -> Bool) -> MutArray a -> Stream m (MutArray a) -splitOn predicate arr = +sliceEndBy_ predicate arr = fmap (\(i, len) -> unsafeGetSlice i len arr) - $ D.indexOnSuffix predicate (read arr) + $ D.indexEndBy_ predicate (read arr) + +RENAME(splitOn,sliceEndBy_) + +-- XXX breakEndBy_? -- | Drops the separator byte {-# INLINE breakOn #-} diff --git a/core/src/Streamly/Internal/Data/Stream/Type.hs b/core/src/Streamly/Internal/Data/Stream/Type.hs index c63a722b7e..fefc3b4c02 100644 --- a/core/src/Streamly/Internal/Data/Stream/Type.hs +++ b/core/src/Streamly/Internal/Data/Stream/Type.hs @@ -143,7 +143,7 @@ module Streamly.Internal.Data.Stream.Type , foldIterateBfs -- * Splitting - , indexOnSuffix + , indexEndBy_ -- * Multi-stream folds -- | These should probably be expressed using zipping operations. @@ -153,6 +153,7 @@ module Streamly.Internal.Data.Stream.Type -- * Deprecated , sliceOnSuffix , unfoldMany + , indexOnSuffix ) where @@ -2114,25 +2115,27 @@ indexerBy (Fold step1 initial1 extract1 _final) n = extract (Tuple' i s) = (i,) <$> extract1 s --- XXX rename to indicesEndBy - --- | Like 'splitEndBy' but generates a stream of (index, len) tuples marking +-- | Like 'splitEndBy_' but generates a stream of (index, len) tuples marking -- the places where the predicate matches in the stream. -- +-- >>> Stream.toList $ Stream.indexEndBy_ (== '/') $ Stream.fromList "/home/harendra" +-- [(0,0),(1,4),(6,8)] +-- -- /Pre-release/ -{-# INLINE indexOnSuffix #-} -indexOnSuffix :: Monad m => +{-# INLINE indexEndBy_ #-} +indexEndBy_, indexOnSuffix :: Monad m => (a -> Bool) -> Stream m a -> Stream m (Int, Int) -indexOnSuffix predicate = - -- Scan the stream with the given refold +indexEndBy_ predicate = refoldIterateM (indexerBy (FL.takeEndBy_ predicate FL.length) 1) (return (-1, 0)) +RENAME(indexOnSuffix,indexEndBy_) + -- Alternate implementation -{-# INLINE_NORMAL _indexOnSuffix #-} -_indexOnSuffix :: Monad m => (a -> Bool) -> Stream m a -> Stream m (Int, Int) -_indexOnSuffix p (Stream step1 state1) = Stream step (Just (state1, 0, 0)) +{-# INLINE_NORMAL _indexEndBy_ #-} +_indexEndBy_ :: Monad m => (a -> Bool) -> Stream m a -> Stream m (Int, Int) +_indexEndBy_ p (Stream step1 state1) = Stream step (Just (state1, 0, 0)) where @@ -2149,9 +2152,9 @@ _indexOnSuffix p (Stream step1 state1) = Stream step (Just (state1, 0, 0)) Stop -> if len == 0 then Stop else Yield (i, len) Nothing step _ Nothing = return Stop -{-# DEPRECATED sliceOnSuffix "Please use indexOnSuffix instead." #-} +{-# DEPRECATED sliceOnSuffix "Please use indexEndBy_ instead." #-} sliceOnSuffix :: Monad m => (a -> Bool) -> Stream m a -> Stream m (Int, Int) -sliceOnSuffix = indexOnSuffix +sliceOnSuffix = indexEndBy_ ------------------------------------------------------------------------------ -- Stream with a cross product style monad instance From 54064407524c30618a8672bd56c23d079366e121 Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Sun, 29 Dec 2024 11:48:48 +0530 Subject: [PATCH 2/7] Add isAbsolute, maybeFile and refactor --- core/src/Streamly/Internal/FileSystem/Path.hs | 41 +++- .../Internal/FileSystem/Path/Common.hs | 210 +++++++++++++----- .../Internal/FileSystem/PosixPath/FileDir.hs | 23 +- 3 files changed, 186 insertions(+), 88 deletions(-) diff --git a/core/src/Streamly/Internal/FileSystem/Path.hs b/core/src/Streamly/Internal/FileSystem/Path.hs index 80238ed8e2..bd2ee4f275 100644 --- a/core/src/Streamly/Internal/FileSystem/Path.hs +++ b/core/src/Streamly/Internal/FileSystem/Path.hs @@ -17,17 +17,36 @@ -- 'Path' type. -- -- The basic type-safety is provided by the --- "Streamly.Internal.FileSystem.PosixPath.LocSeg" module. We make a distinction --- between two types of paths viz. locations and segments. Locations are --- represented by the @Loc Path@ type and path segments are represented by the --- @Seg Path@ type. Locations are paths pointing to specific objects in the --- file system absolute or relative e.g. @\/usr\/bin@, @.\/local\/bin@, or @.@. --- Segments are a sequence of path components without any reference to a --- location e.g. @usr\/bin@, @local\/bin@, or @../bin@ are segments. This --- distinction is for safe append operation on paths, you can only append --- segments to any path and not a location. If you use the 'Path' type then --- append can fail if you try to append a location to a path, but if you use --- @Loc Path@ or @Seg Path@ types then append can never fail. +-- "Streamly.Internal.FileSystem.PosixPath.LocSeg" module. We make a +-- distinction between two types of paths viz. locations and segments. +-- Locations are represented by the @Loc Path@ type and path segments are +-- represented by the @Seg Path@ type. +-- +-- Locations are rooted paths, they have a "root" attached to them. Rooted +-- paths can be absolute or relative. Absolute paths have an absolute root e.g. +-- @\/usr\/bin@. Relative paths have a dynamic or relative root e.g. +-- @.\/local\/bin@, or @.@, in these cases the root is current directory which +-- is not absolute but can change dynamically, nevertheless these are rooted +-- paths as they still refer to a specific location starting from some root in +-- the file system even though the root is decided dynamically. +-- +-- In contrast to rooted paths, path segments are simply a sequence of path +-- components without any reference to a root or specific starting location +-- e.g. @usr\/bin@, @local\/bin@, or @../bin@ are simply path segments which +-- can be attached to any other path or segment to augment it. This distinction +-- is made to allow for safe append operation on paths, you can only append +-- path segments to any path, a rooted path cannot be appended to another path. +-- If you use the 'Path' type then append can fail if you try to append a +-- rooted location to another path, but if you use @Loc Path@ or @Seg Path@ +-- types then append can never fail at run time as the types would not allow +-- it. +-- +-- To summarize the conceptual distinctions: +-- * Path +-- * Rooted location +-- * Absolute +-- * Relative +-- * Unrooted segment -- -- Independently of the location or segment distinction you can also make the -- distinction between files and directories using the diff --git a/core/src/Streamly/Internal/FileSystem/Path/Common.hs b/core/src/Streamly/Internal/FileSystem/Path/Common.hs index 1c0767ecb4..96cb9fb1ba 100644 --- a/core/src/Streamly/Internal/FileSystem/Path/Common.hs +++ b/core/src/Streamly/Internal/FileSystem/Path/Common.hs @@ -30,6 +30,9 @@ module Streamly.Internal.FileSystem.Path.Common , dropTrailingSeparators , isSegment , isLocation + , maybeFile + , isAbsolute + , isRelativeWithDrive , append , unsafeAppend @@ -43,6 +46,7 @@ where #include "assert.hs" +import Control.Monad (when) import Control.Monad.Catch (MonadThrow(..)) import Data.Char (ord, isAlpha) import Data.Functor.Identity (Identity(..)) @@ -54,7 +58,7 @@ import GHC.Base (unsafeChr) import Language.Haskell.TH (Q, Exp) import Language.Haskell.TH.Quote (QuasiQuoter (..)) import Streamly.Internal.Data.Array (Array(..)) -import Streamly.Internal.Data.MutByteArray (Unbox) +import Streamly.Internal.Data.MutByteArray (Unbox(..)) import Streamly.Internal.Data.Path (PathException(..)) import Streamly.Internal.Data.Stream (Stream) import System.IO.Unsafe (unsafePerformIO) @@ -175,15 +179,9 @@ mkQ f = ++ ", can be used only as an expression" ------------------------------------------------------------------------------ --- Operations +-- Parsing Operations ------------------------------------------------------------------------------ -posixSeparator :: Char -posixSeparator = '/' - -windowsSeparator :: Char -windowsSeparator = '\\' - -- XXX We can use Enum type class to include the Char type as well so that the -- functions can work on Array Word8/Word16/Char but that may be slow. @@ -202,6 +200,16 @@ wordToChar = unsafeChr . fromIntegral unsafeIndexChar :: (Unbox a, Integral a) => Int -> Array a -> Char unsafeIndexChar i a = wordToChar (Array.getIndexUnsafe i a) +------------------------------------------------------------------------------ +-- Separator parsing +------------------------------------------------------------------------------ + +posixSeparator :: Char +posixSeparator = '/' + +windowsSeparator :: Char +windowsSeparator = '\\' + -- | Primary path separator character, @/@ on Posix and @\\@ on Windows. -- Windows supports @/@ too as a separator. Please use 'isSeparator' for -- testing if a char is a separator char. @@ -210,10 +218,6 @@ primarySeparator :: OS -> Char primarySeparator Posix = posixSeparator primarySeparator Windows = windowsSeparator ------------------------------------------------------------------------------- --- Path parsing utilities ------------------------------------------------------------------------------- - -- | On Posix only @/@ is a path separator but in windows it could be either -- @/@ or @\\@. {-# INLINE isSeparator #-} @@ -225,6 +229,10 @@ isSeparator Posix c = c == posixSeparator isSeparatorWord :: Integral a => OS -> a -> Bool isSeparatorWord os = isSeparator os . wordToChar +------------------------------------------------------------------------------ +-- Path normalization +------------------------------------------------------------------------------ + countTrailingBy :: Unbox a => (a -> Bool) -> Array a -> Int countTrailingBy p arr = runIdentity @@ -244,23 +252,26 @@ dropTrailingBy p arr@(Array barr start end) = -- | If the path is @//@ the result is @/@. If it is @a//@ then the result is -- @a@. +-- +-- Note that a path with trailing separators may implicitly be considered as a +-- directory by some applications. So dropping it may change the dir nature of +-- the path. {-# INLINE dropTrailingSeparators #-} dropTrailingSeparators :: (Unbox a, Integral a) => OS -> Array a -> Array a dropTrailingSeparators os = dropTrailingBy (isSeparator os . wordToChar) --- | path is @.@ or starts with @./@. -isCurDirRelativeLocation :: (Unbox a, Integral a) => Array a -> Bool -isCurDirRelativeLocation a - -- Assuming the path is not empty. - | wordToChar (Array.getIndexUnsafe 0 a) /= '.' = False - | Array.byteLength a < 2 = True - | otherwise = isSeparator Windows (wordToChar (Array.getIndexUnsafe 1 a)) +-- XXX We implicitly consider "./" as a rooted path. We can provide an API to +-- drop all leading "." to make it a segment from a rooted path. --- | @C:...@ -hasDrive :: (Unbox a, Integral a) => Array a -> Bool -hasDrive a - | Array.byteLength a < 2 = False +------------------------------------------------------------------------------ +-- Drive parsing +------------------------------------------------------------------------------ + +-- | @C:...@, does not check array length. +{-# INLINE unsafeHasDrive #-} +unsafeHasDrive :: (Unbox a, Integral a) => Array a -> Bool +unsafeHasDrive a -- Check colon first for quicker return | unsafeIndexChar 1 a /= ':' = False -- XXX If we found a colon anyway this cannot be a valid path unless it has @@ -269,63 +280,148 @@ hasDrive a | not (isAlpha (unsafeIndexChar 0 a)) = False | otherwise = True --- | On windows, the path starts with a separator. -isCurDriveRelativeLocation :: (Unbox a, Integral a) => Array a -> Bool -isCurDriveRelativeLocation a = +-- | A path that starts with a alphabet followed by a colon e.g. @C:...@. +hasDrive :: (Unbox a, Integral a) => Array a -> Bool +hasDrive a = Array.byteLength a >= 2 && unsafeHasDrive a + +-- | A path that contains only an alphabet followed by a colon e.g. @C:@. +isDrive :: (Unbox a, Integral a) => Array a -> Bool +isDrive a = Array.byteLength a == 2 && unsafeHasDrive a + +------------------------------------------------------------------------------ +-- Relative or Absolute +------------------------------------------------------------------------------ + +-- | A path relative to cur dir it is either @.@ or starts with @./@. +isRelativeCurDir :: (Unbox a, Integral a) => Array a -> Bool +isRelativeCurDir a -- Assuming the path is not empty. - isSeparator Windows (wordToChar (Array.getIndexUnsafe 0 a)) + | wordToChar (Array.getIndexUnsafe 0 a) /= '.' = False + | Array.byteLength a < 2 = True + | otherwise = isSeparator Windows (wordToChar (Array.getIndexUnsafe 1 a)) --- | @C:\...@ -isLocationDrive :: (Unbox a, Integral a) => Array a -> Bool -isLocationDrive a - | Array.byteLength a < 3 = False - -- Check colon first for quicker return - | unsafeIndexChar 1 a /= ':' = False - | not (isSeparator Windows (unsafeIndexChar 2 a)) = False - -- XXX If we found a colon anyway this cannot be a valid path unless it has - -- a drive prefix. colon is not a valid path character. - -- XXX check isAlpha perf - | not (isAlpha (unsafeIndexChar 0 a)) = False - | otherwise = True +-- | The path starting with a separator. On Windows this is relative to current +-- drive while on Posix this is absolute path as there is only one drive. +isRelativeCurDrive :: (Unbox a, Integral a) => OS -> Array a -> Bool +isRelativeCurDrive os a = + -- Assuming the path is not empty. + isSeparator os (wordToChar (Array.getIndexUnsafe 0 a)) + +-- | @C:@ or @C:a...@. +isRelativeWithDrive :: (Unbox a, Integral a) => Array a -> Bool +isRelativeWithDrive a = + hasDrive a + && ( Array.byteLength a < 3 + || not (isSeparator Windows (unsafeIndexChar 2 a)) + ) + +-- | @C:\...@. Note that "C:" or "C:a" is not absolute. +isAbsoluteWithDrive :: (Unbox a, Integral a) => Array a -> Bool +isAbsoluteWithDrive a = + Array.byteLength a >= 3 + && unsafeHasDrive a + && isSeparator Windows (unsafeIndexChar 2 a) -- | @\\\\...@ -isAbsoluteUNCLocation :: (Unbox a, Integral a) => Array a -> Bool -isAbsoluteUNCLocation a +isAbsoluteUNC :: (Unbox a, Integral a) => Array a -> Bool +isAbsoluteUNC a | Array.byteLength a < 2 = False | unsafeIndexChar 0 a /= '\\' = False | unsafeIndexChar 1 a /= '\\' = False | otherwise = True --- | On Posix and Windows, --- * a path starting with a separator, an absolute location --- * current dir "." or a location relative to current dir "./" +-- | Note that on Windows a path starting with a separator is relative to +-- current drive while on Posix this is absolute path as there is only one +-- drive. +isAbsolute :: (Unbox a, Integral a) => OS -> Array a -> Bool +isAbsolute Posix arr = + isRelativeCurDrive Posix arr +isAbsolute Windows arr = + isAbsoluteWithDrive arr || isAbsoluteUNC arr + +------------------------------------------------------------------------------ +-- Location or Segment +------------------------------------------------------------------------------ + +-- XXX API for static processing of .. (normalizeParentRefs) +-- +-- Note: paths starting with . or .. are ambiguous and can be considered +-- segments or rooted. We consider a path starting with "." as rooted, when +-- someone uses "./x" they explicitly mean x in the current directory whereas +-- just "x" can be taken to mean a path segment without any specific root. +-- However, in typed paths the programmer can convey the meaning whether they +-- mean it as a segment or a rooted path. So even "./x" can potentially be used +-- as a segment which can just mean "x". -- --- On Windows: --- * @C:\\@ local absolute --- * @C:@ local relative --- * @\\@ local relative to current drive root --- * @\\\\@ UNC network location +-- XXX For the untyped Path we can allow appending "./x" to other paths. We can +-- leave this to the programmer. In typed paths we can allow "./x" in segments. +-- +-- XXX C:\\ is invalid, \\share\ is invalid? +-- XXX Empty path can be taken to mean "." except in case of UNC paths + +-- | Rooted paths on Posix and Windows, +-- * @/...@ a path starting with a separator +-- * @.@ current dir +-- * @./...@ a location relative to current dir +-- +-- Rooted paths on Windows: +-- * @C:@ local drive cur dir location +-- * @C:a\\b@ local drive relative to cur dir location +-- * @C:\\@ local drive absolute location +-- * @\\@ local path relative to current drive +-- * @\\\\share\\@ UNC network location -- * @\\\\?\\C:\\@ Long UNC local path -- * @\\\\?\\UNC\\@ Long UNC server location -- * @\\\\.\\@ DOS local device namespace -- * @\\\\??\\@ DOS global namespace -- --- * @C:file@ a path relative to curdir. isLocation :: (Unbox a, Integral a) => OS -> Array a -> Bool isLocation Posix a = - -- Assuming path is not empty. - isSeparator Posix (wordToChar (Array.getIndexUnsafe 0 a)) - || isCurDirRelativeLocation a + isRelativeCurDrive Posix a + || isRelativeCurDir a isLocation Windows a = - isLocationDrive a - || isCurDriveRelativeLocation a + isRelativeCurDrive Windows a + || isRelativeCurDir a || hasDrive a -- curdir-in-drive relative, drive absolute - || isAbsoluteUNCLocation a - || isCurDirRelativeLocation a + || isAbsoluteUNC a isSegment :: (Unbox a, Integral a) => OS -> Array a -> Bool isSegment os = not . isLocation os +------------------------------------------------------------------------------ +-- File or Dir +------------------------------------------------------------------------------ + +-- | Returns () if the path can be a valid file, otherwise throws an +-- exception. +maybeFile :: (MonadThrow m, Unbox a, Integral a) => OS -> Array a -> m () +maybeFile os arr = do + s1 <- + Stream.toList + $ Stream.take 3 + $ Stream.takeWhile (not . isSeparator os) + $ fmap wordToChar + $ Array.readRev arr + -- XXX On posix we just need to check last 3 bytes of the array + -- XXX Display the path in the exception messages. + case s1 of + [] -> throwM $ InvalidPath "A file name cannot have a trailing separator" + '.' : xs -> + case xs of + [] -> throwM $ InvalidPath "A file name cannot have a trailing \".\"" + '.' : [] -> + throwM $ InvalidPath "A file name cannot have a trailing \"..\"" + _ -> pure () + _ -> pure () + + case os of + Windows -> + -- XXX We can exclude a UNC root as well but just the UNC root is + -- not even a valid path. + when (isDrive arr) + $ throwM $ InvalidPath "A drive root is not a valid file name" + Posix -> pure () + ------------------------------------------------------------------------------ -- Operations of Path ------------------------------------------------------------------------------ diff --git a/core/src/Streamly/Internal/FileSystem/PosixPath/FileDir.hs b/core/src/Streamly/Internal/FileSystem/PosixPath/FileDir.hs index 3833034df0..eda0864e84 100644 --- a/core/src/Streamly/Internal/FileSystem/PosixPath/FileDir.hs +++ b/core/src/Streamly/Internal/FileSystem/PosixPath/FileDir.hs @@ -46,16 +46,13 @@ module Streamly.Internal.FileSystem.OS_PATH.FileDir ) where -import Control.Monad.Catch (MonadThrow(..)) import Language.Haskell.TH (Q, Exp) import Language.Haskell.TH.Syntax (lift) import Language.Haskell.TH.Quote (QuasiQuoter) -import Streamly.Internal.Data.Path (IsPath(..), PathException(..)) +import Streamly.Internal.Data.Path (IsPath(..)) import Streamly.Internal.FileSystem.Path.Common (OS(..), mkQ) import Streamly.Internal.FileSystem.OS_PATH (OS_PATH(..)) -import qualified Streamly.Internal.Data.Array as Array -import qualified Streamly.Internal.Data.Stream as Stream import qualified Streamly.Internal.FileSystem.Path.Common as Common import qualified Streamly.Internal.FileSystem.OS_PATH as OsPath @@ -84,23 +81,9 @@ instance IsFileDir (Dir a) instance IsPath OS_PATH (File OS_PATH) where unsafeFromPath = File - -- Cannot have "." or ".." as last component. fromPath p@(OS_PATH arr) = do - s1 <- - Stream.toList - $ Stream.take 3 - $ Stream.takeWhile (not . Common.isSeparator OS_NAME) - $ fmap Common.wordToChar - $ Array.readRev arr - -- XXX On posix we just need to check last 3 bytes of the array - case s1 of - '.' : xs -> - case xs of - [] -> throwM $ InvalidPath "A file name cannot be \".\"" - '.' : [] -> - throwM $ InvalidPath "A file name cannot be \"..\"" - _ -> pure $ File p - _ -> pure $ File p + !_ <- Common.maybeFile OS_NAME arr + pure $ File p toPath (File p) = p From ed2cc9f868d237520603779a2a0d9e5d9948cc7b Mon Sep 17 00:00:00 2001 From: Adithya Kumar Date: Thu, 5 Dec 2024 20:39:07 +0530 Subject: [PATCH 3/7] Add splitPath operation --- .../Internal/FileSystem/Path/Common.hs | 48 +++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/core/src/Streamly/Internal/FileSystem/Path/Common.hs b/core/src/Streamly/Internal/FileSystem/Path/Common.hs index 96cb9fb1ba..9ee4a5ea1d 100644 --- a/core/src/Streamly/Internal/FileSystem/Path/Common.hs +++ b/core/src/Streamly/Internal/FileSystem/Path/Common.hs @@ -36,6 +36,7 @@ module Streamly.Internal.FileSystem.Path.Common , append , unsafeAppend + , splitPath -- * Utilities , wordToChar @@ -46,9 +47,29 @@ where #include "assert.hs" +{- $setup +>>> :m + +>>> import Data.Functor.Identity (runIdentity) +>>> import System.IO.Unsafe (unsafePerformIO) +>>> import qualified Streamly.Data.Stream as Stream +>>> import qualified Streamly.Unicode.Stream as Unicode +>>> import qualified Streamly.Internal.Data.Array as Array +>>> import qualified Streamly.Internal.FileSystem.Path.Common as Common +>>> import qualified Streamly.Internal.Unicode.Stream as Unicode + +>>> packPosix = unsafePerformIO . Stream.fold Array.create . Unicode.encodeUtf8' . Stream.fromList +>>> unpackPosix = runIdentity . Stream.toList . Unicode.decodeUtf8' . Array.read + +>>> packWindows = unsafePerformIO . Stream.fold Array.create . Unicode.encodeUtf16le' . Stream.fromList +>>> unpackWindows = runIdentity . Stream.toList . Unicode.decodeUtf16le' . Array.read +-} + import Control.Monad (when) import Control.Monad.Catch (MonadThrow(..)) +import Control.Monad.IO.Class (MonadIO(..)) import Data.Char (ord, isAlpha) +import Data.Function ((&)) import Data.Functor.Identity (Identity(..)) #ifdef DEBUG import Data.Maybe (fromJust) @@ -463,3 +484,30 @@ append :: (Unbox a, Integral a) => OS -> (Array a -> String) -> Array a -> Array a -> Array a append os toStr a b = withAppendCheck os toStr b (doAppend os a b) + +-- | +-- >>> :{ +-- splitPath Common.Posix = Stream.toList . fmap unpackPosix . Common.splitPath Common.Posix . packPosix +-- splitPath Common.Windows = Stream.toList . fmap unpackWindows . Common.splitPath Common.Windows . packWindows +-- :} +-- +-- >>> splitPath Common.Posix "home//user/./..////\\directory/." +-- ["home","user","..","\\directory"] +-- +-- >>> splitPath Common.Windows "home//user/./..////\\directory/." +-- ["home","user","..","directory"] +-- +{-# INLINE splitPath #-} +splitPath + :: forall a m. (Unbox a, Integral a, MonadIO m) + => OS -> Array a -> Stream m (Array a) +splitPath os arr = + Stream.indexEndBy_ (isSeparatorWord os) (Array.read arr) + & Stream.filter (not . shouldFilterOut) + & fmap (\(i, len) -> Array.getSliceUnsafe i len arr) + + where + + shouldFilterOut (off, len) = + len == 0 || + (len == 1 && unsafeIndexChar off arr == '.') From bf881de24c847a49909eadc2ce119721ac5783b7 Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Sun, 29 Dec 2024 13:21:54 +0530 Subject: [PATCH 4/7] Add doc and some doctests --- .../Streamly/Internal/FileSystem/Path/Common.hs | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/core/src/Streamly/Internal/FileSystem/Path/Common.hs b/core/src/Streamly/Internal/FileSystem/Path/Common.hs index 9ee4a5ea1d..a7c7a799b3 100644 --- a/core/src/Streamly/Internal/FileSystem/Path/Common.hs +++ b/core/src/Streamly/Internal/FileSystem/Path/Common.hs @@ -485,12 +485,24 @@ append :: (Unbox a, Integral a) => append os toStr a b = withAppendCheck os toStr b (doAppend os a b) --- | +-- | Split a path into components separated by the path separator. "." +-- components in the path are ignored except in the leading position. Multiple +-- consecutive separators are ignored. +-- -- >>> :{ -- splitPath Common.Posix = Stream.toList . fmap unpackPosix . Common.splitPath Common.Posix . packPosix -- splitPath Common.Windows = Stream.toList . fmap unpackWindows . Common.splitPath Common.Windows . packWindows -- :} -- +-- >>> splitPath Common.Posix "." +-- ["."] +-- +-- >>> splitPath Common.Posix "././" +-- ["."] +-- +-- >>> splitPath Common.Posix "./a/b/." +-- [".","a","b"] +-- -- >>> splitPath Common.Posix "home//user/./..////\\directory/." -- ["home","user","..","\\directory"] -- From 432ee6d9a9c5fd0bffe7dfc2f5d45a485df1ec45 Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Sun, 29 Dec 2024 14:10:08 +0530 Subject: [PATCH 5/7] Add unsafeJoinPaths --- .../Internal/FileSystem/Path/Common.hs | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/core/src/Streamly/Internal/FileSystem/Path/Common.hs b/core/src/Streamly/Internal/FileSystem/Path/Common.hs index a7c7a799b3..d3186d36a3 100644 --- a/core/src/Streamly/Internal/FileSystem/Path/Common.hs +++ b/core/src/Streamly/Internal/FileSystem/Path/Common.hs @@ -37,6 +37,7 @@ module Streamly.Internal.FileSystem.Path.Common , append , unsafeAppend , splitPath + , unsafeJoinPaths -- * Utilities , wordToChar @@ -523,3 +524,20 @@ splitPath os arr = shouldFilterOut (off, len) = len == 0 || (len == 1 && unsafeIndexChar off arr == '.') + +-- | Join paths by path separator. Does not check if the paths being appended +-- are rooted or path segments. Note that splitting and joining may not give +-- exactly the original path but an equivalent, normalized path. +{-# INLINE unsafeJoinPaths #-} +unsafeJoinPaths + :: forall a m. (Unbox a, Integral a, MonadIO m) + => OS -> Stream m (Array a) -> m (Array a) +unsafeJoinPaths os = + -- XXX This can be implemented more efficiently using an Array intersperse + -- operation. Which can be implemented by directly copying arrays rather + -- than converting them to stream first. Also fromStreamN would be more + -- efficient if we have to use streams. + -- XXX We can remove leading and trailing separators first, if any except + -- the leading separator from the first path. But it is not necessary. + -- Instead we can avoid adding a separator if it is already present. + Array.fromStream . Array.concatSepBy (charToWord $ primarySeparator os) From 8aef8ab438482ada4889a16fd621eb74c187811e Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Mon, 30 Dec 2024 15:56:14 +0530 Subject: [PATCH 6/7] Fix splitPath implementation, add splitRoot --- .../Internal/FileSystem/Path/Common.hs | 382 +++++++++++++++--- 1 file changed, 330 insertions(+), 52 deletions(-) diff --git a/core/src/Streamly/Internal/FileSystem/Path/Common.hs b/core/src/Streamly/Internal/FileSystem/Path/Common.hs index d3186d36a3..5e097bcb2b 100644 --- a/core/src/Streamly/Internal/FileSystem/Path/Common.hs +++ b/core/src/Streamly/Internal/FileSystem/Path/Common.hs @@ -36,13 +36,22 @@ module Streamly.Internal.FileSystem.Path.Common , append , unsafeAppend + , splitRoot + -- , dropRoot + -- , joinRoot , splitPath , unsafeJoinPaths + -- , processParentRefs -- * Utilities , wordToChar , charToWord , unsafeIndexChar + + -- * Internal + , unsafeSplitTopLevel + , unsafeSplitDrive + , unsafeSplitUNC ) where @@ -255,12 +264,18 @@ isSeparatorWord os = isSeparator os . wordToChar -- Path normalization ------------------------------------------------------------------------------ -countTrailingBy :: Unbox a => (a -> Bool) -> Array a -> Int -countTrailingBy p arr = +countWhile :: (a -> Bool) -> Stream Identity a -> Int +countWhile p = runIdentity - $ Stream.fold Fold.length - $ Stream.takeWhile p - $ Array.readRev arr + . Stream.fold Fold.length + . Stream.takeWhile p + +{-# INLINE countLeadingBy #-} +countLeadingBy :: Unbox a => (a -> Bool) -> Array a -> Int +countLeadingBy p = countWhile p . Array.read + +countTrailingBy :: Unbox a => (a -> Bool) -> Array a -> Int +countTrailingBy p = countWhile p . Array.readRev -- | If the path is @//@ the result is @/@. If it is @a//@ then the result is -- @a@. @@ -304,60 +319,83 @@ unsafeHasDrive a -- | A path that starts with a alphabet followed by a colon e.g. @C:...@. hasDrive :: (Unbox a, Integral a) => Array a -> Bool -hasDrive a = Array.byteLength a >= 2 && unsafeHasDrive a +hasDrive a = Array.length a >= 2 && unsafeHasDrive a -- | A path that contains only an alphabet followed by a colon e.g. @C:@. isDrive :: (Unbox a, Integral a) => Array a -> Bool -isDrive a = Array.byteLength a == 2 && unsafeHasDrive a +isDrive a = Array.length a == 2 && unsafeHasDrive a ------------------------------------------------------------------------------ -- Relative or Absolute ------------------------------------------------------------------------------ -- | A path relative to cur dir it is either @.@ or starts with @./@. -isRelativeCurDir :: (Unbox a, Integral a) => Array a -> Bool -isRelativeCurDir a - -- Assuming the path is not empty. +isRelativeCurDir :: (Unbox a, Integral a) => OS -> Array a -> Bool +isRelativeCurDir os a + | len == 0 = False -- empty path should not occur | wordToChar (Array.getIndexUnsafe 0 a) /= '.' = False - | Array.byteLength a < 2 = True - | otherwise = isSeparator Windows (wordToChar (Array.getIndexUnsafe 1 a)) + | len < 2 = True + | otherwise = isSeparatorWord os (Array.getIndexUnsafe 1 a) + + where + + len = Array.length a + +-- | A path starting with a separator. +hasLeadingSeparator :: (Unbox a, Integral a) => OS -> Array a -> Bool +hasLeadingSeparator os a + | Array.length a == 0 = False -- empty path should not occur + | isSeparatorWord os (Array.getIndexUnsafe 0 a) = True + | otherwise = False + +-- | A non-UNC path starting with a separator. +isRelativeCurDriveRoot :: (Unbox a, Integral a) => Array a -> Bool +isRelativeCurDriveRoot a + | len == 0 = False -- empty path should not occur + | len == 1 && sep0 = True + | sep0 && c0 /= c1 = True + | otherwise = False + + where --- | The path starting with a separator. On Windows this is relative to current --- drive while on Posix this is absolute path as there is only one drive. -isRelativeCurDrive :: (Unbox a, Integral a) => OS -> Array a -> Bool -isRelativeCurDrive os a = - -- Assuming the path is not empty. - isSeparator os (wordToChar (Array.getIndexUnsafe 0 a)) + len = Array.length a + c0 = Array.getIndexUnsafe 0 a + c1 = Array.getIndexUnsafe 1 a + sep0 = isSeparatorWord Windows c0 -- | @C:@ or @C:a...@. isRelativeWithDrive :: (Unbox a, Integral a) => Array a -> Bool isRelativeWithDrive a = hasDrive a - && ( Array.byteLength a < 3 + && ( Array.length a < 3 || not (isSeparator Windows (unsafeIndexChar 2 a)) ) -- | @C:\...@. Note that "C:" or "C:a" is not absolute. isAbsoluteWithDrive :: (Unbox a, Integral a) => Array a -> Bool isAbsoluteWithDrive a = - Array.byteLength a >= 3 + Array.length a >= 3 && unsafeHasDrive a && isSeparator Windows (unsafeIndexChar 2 a) --- | @\\\\...@ +-- | @\\\\...@ or @//...@ isAbsoluteUNC :: (Unbox a, Integral a) => Array a -> Bool isAbsoluteUNC a - | Array.byteLength a < 2 = False - | unsafeIndexChar 0 a /= '\\' = False - | unsafeIndexChar 1 a /= '\\' = False - | otherwise = True + | Array.length a < 2 = False + | isSeparatorWord Windows c0 && c0 == c1 = True + | otherwise = False + + where + + c0 = Array.getIndexUnsafe 0 a + c1 = Array.getIndexUnsafe 1 a -- | Note that on Windows a path starting with a separator is relative to -- current drive while on Posix this is absolute path as there is only one -- drive. isAbsolute :: (Unbox a, Integral a) => OS -> Array a -> Bool isAbsolute Posix arr = - isRelativeCurDrive Posix arr + hasLeadingSeparator Posix arr isAbsolute Windows arr = isAbsoluteWithDrive arr || isAbsoluteUNC arr @@ -377,11 +415,27 @@ isAbsolute Windows arr = -- -- XXX For the untyped Path we can allow appending "./x" to other paths. We can -- leave this to the programmer. In typed paths we can allow "./x" in segments. --- --- XXX C:\\ is invalid, \\share\ is invalid? -- XXX Empty path can be taken to mean "." except in case of UNC paths - --- | Rooted paths on Posix and Windows, +-- +-- XXX "//share/x" works in powershell. But mixed forward and backward slashes +-- do not work, it is treated as a path relative to current drive e.g. +-- "\\/share/x" is treated as "C:/share/x". +-- +-- Invalid paths: +-- "C:\\\\" +-- "C:\\\\x" +-- "\\\\" +-- "\\\\share" +-- "\\\\share\\" +-- "\\\\share\\\\" +-- "\\\\share\\\\x" +-- "\\\\?\\c:" +-- "\\\\?\\c:\\\\\\" + +-- | Any path that starts with a separator, @./@ or a drive prefix is a rooted +-- path. +-- +-- Rooted paths on Posix and Windows, -- * @/...@ a path starting with a separator -- * @.@ current dir -- * @./...@ a location relative to current dir @@ -399,13 +453,12 @@ isAbsolute Windows arr = -- isLocation :: (Unbox a, Integral a) => OS -> Array a -> Bool isLocation Posix a = - isRelativeCurDrive Posix a - || isRelativeCurDir a + hasLeadingSeparator Posix a + || isRelativeCurDir Posix a isLocation Windows a = - isRelativeCurDrive Windows a - || isRelativeCurDir a + hasLeadingSeparator Windows a + || isRelativeCurDir Windows a || hasDrive a -- curdir-in-drive relative, drive absolute - || isAbsoluteUNC a isSegment :: (Unbox a, Integral a) => OS -> Array a -> Bool isSegment os = not . isLocation os @@ -453,8 +506,8 @@ maybeFile os arr = do {-# INLINE doAppend #-} doAppend :: (Unbox a, Integral a) => OS -> Array a -> Array a -> Array a doAppend os a b = unsafePerformIO $ do - let lenA = Array.byteLength a - lenB = Array.byteLength b + let lenA = Array.length a + lenB = Array.length b assertM(lenA /= 0 && lenB /= 0) assertM(countTrailingBy (isSeparatorWord os) a == 0) let len = lenA + 1 + lenB @@ -486,51 +539,276 @@ append :: (Unbox a, Integral a) => append os toStr a b = withAppendCheck os toStr b (doAppend os a b) +------------------------------------------------------------------------------ +-- Splitting +------------------------------------------------------------------------------ + +unsafeSplitPrefix :: (Unbox a, Integral a) => + OS -> Int -> Array a -> (Array a, Array a) +unsafeSplitPrefix os prefixLen arr = (drive, path) + + where + + len = Array.length arr + -- XXX Array.readFrom may be useful here + afterDrive = Array.getSliceUnsafe prefixLen (len - prefixLen) arr + n = countLeadingBy (isSeparatorWord os) afterDrive + cnt = prefixLen + n + drive = Array.getSliceUnsafe 0 cnt arr + path = Array.getSliceUnsafe cnt (len - cnt) arr + +-- XXX We can produce a normalized result for the drive during split. + +-- | Split a path prefixed with a separator into (drive, path) tuple. +-- +-- >>> toListPosix (a,b) = (unpackPosix a, unpackPosix b) +-- >>> splitPosix = toListPosix . Common.unsafeSplitTopLevel Common.Posix . packPosix +-- +-- >>> toListWin (a,b) = (unpackWindows a, unpackWindows b) +-- >>> splitWin = toListWin . Common.unsafeSplitTopLevel Common.Windows . packWindows +-- +-- >>> splitPosix "/" +-- ("/","") +-- +-- >>> splitPosix "//" +-- ("//","") +-- +-- >>> splitPosix "/home" +-- ("/","home") +-- +-- >>> splitPosix "/home/user" +-- ("/","home/user") +-- +-- >>> splitWin "\\" +-- ("\\","") +-- +-- >>> splitWin "\\home" +-- ("\\","home") +unsafeSplitTopLevel :: (Unbox a, Integral a) => + OS -> Array a -> (Array a, Array a) +-- Note on Windows we should be here only when the path starts with exactly one +-- separator, otherwise it would be UNC path. But on posix multiple separators +-- are valid. +unsafeSplitTopLevel os = unsafeSplitPrefix os 1 + +-- In some cases there is no valid drive component e.g. "\\a\\b", though if we +-- consider relative roots then we could use "\\" as the root in this case. In +-- other cases there is no valid path component e.g. "C:" or "\\share\\" though +-- the latter is not a valid path and in the former case we can use "." as the +-- path component. +-- +-- XXX Note, on windows C:\\\\x is an invalid path. + +-- | Split a path prefixed with drive into (drive, path) tuple. +-- +-- >>> toList (a,b) = (unpackPosix a, unpackPosix b) +-- >>> split = toList . Common.unsafeSplitDrive . packPosix +-- +-- >>> split "C:" +-- ("C:","") +-- +-- >>> split "C:a" +-- ("C:","a") +-- +-- >>> split "C:\\" +-- ("C:\\","") +-- +-- >>> split "C:\\\\" -- this is invalid path +-- ("C:\\\\","") +-- +-- >>> split "C:\\\\a" -- this is invalid path +-- ("C:\\\\","a") +-- +-- >>> split "C:\\/a/b" -- is this valid path? +-- ("C:\\/","a/b") +unsafeSplitDrive :: (Unbox a, Integral a) => Array a -> (Array a, Array a) +unsafeSplitDrive = unsafeSplitPrefix Windows 2 + +-- | Skip separators and then parse the next path segment. +-- Return (segment offset, segment length). +parseSegment :: (Unbox a, Integral a) => Array a -> Int -> Int -> (Int, Int) +parseSegment arr len sepOff = (segOff, segCnt) + + where + + arr1 = Array.getSliceUnsafe sepOff (len - sepOff) arr + sepCnt = countLeadingBy (isSeparatorWord Windows) arr1 + segOff = sepOff + sepCnt + + arr2 = Array.getSliceUnsafe segOff (len - segOff) arr + segCnt = countLeadingBy (not . isSeparatorWord Windows) arr2 + +-- XXX We can split a path as "root, . , rest" or "root, /, rest". +-- XXX We can remove the redundant path separator after the root. With that +-- joining root vs other paths will become similar. But there are some special +-- cases e.g. "C:a" does not have a separator, can we make this "C:.\\a"? In +-- case of "/home" we have "/" as root and we cannot add another separator +-- between this and the rest of the path. + +-- | Split a path prefixed with "\\" into (drive, path) tuple. +-- +-- >>> toList (a,b) = (unpackPosix a, unpackPosix b) +-- >>> split = toList . Common.unsafeSplitUNC . packPosix +-- +-- >> split "" +-- ("","") +-- +-- >>> split "\\\\" +-- ("\\\\","") +-- +-- >>> split "\\\\server" +-- ("\\\\server","") +-- +-- >>> split "\\\\server\\" +-- ("\\\\server\\","") +-- +-- >>> split "\\\\server\\home" +-- ("\\\\server\\","home") +-- +-- >>> split "\\\\?\\c:" +-- ("\\\\?\\c:","") +-- +-- >>> split "\\\\?\\c:/" +-- ("\\\\?\\c:/","") +-- +-- >>> split "\\\\?\\c:\\home" +-- ("\\\\?\\c:\\","home") +-- +-- >>> split "\\\\?\\UNC/" +-- ("\\\\?\\UNC/","") +-- +-- >>> split "\\\\?\\UNC\\server" +-- ("\\\\?\\UNC\\server","") +-- +-- >>> split "\\\\?\\UNC/server\\home" +-- ("\\\\?\\UNC/server\\","home") +-- +unsafeSplitUNC :: (Unbox a, Integral a) => Array a -> (Array a, Array a) +unsafeSplitUNC arr = + if cnt1 == 1 && unsafeIndexChar 2 arr == '?' + then do + if uncLen == 3 + && unsafeIndexChar uncOff arr == 'U' + && unsafeIndexChar (uncOff + 1) arr == 'N' + && unsafeIndexChar (uncOff + 2) arr == 'C' + then unsafeSplitPrefix Windows (serverOff + serverLen) arr + else unsafeSplitPrefix Windows sepOff1 arr + else unsafeSplitPrefix Windows sepOff arr + + where + + len = Array.length arr + arr1 = Array.getSliceUnsafe 2 (len - 2) arr + cnt1 = countLeadingBy (not . isSeparatorWord Windows) arr1 + sepOff = 2 + cnt1 + + -- XXX there should be only one separator in a valid path? + -- XXX it should either be UNC or two letter drive in a valid path + (uncOff, uncLen) = parseSegment arr len sepOff + sepOff1 = uncOff + uncLen + (serverOff, serverLen) = parseSegment arr len sepOff1 + +-- XXX should we make the root Maybe? Both components will have to be Maybe to +-- avoid an empty path. + +-- | If a path is rooted then separate the root and the remaining path +-- otherwise root is returned as empty. +-- +-- >>> toList (a,b) = (unpackPosix a, unpackPosix b) +-- >>> splitPosix = toList . Common.splitRoot Common.Posix . packPosix +-- +-- >>> splitPosix "/" +-- ("/","") +-- +-- >>> splitPosix "." +-- (".","") +-- +-- >>> splitPosix "/home" +-- ("/","home") +-- +-- >>> splitPosix "//" +-- ("//","") +-- +-- >>> splitPosix "./home" +-- ("./","home") +-- +-- >>> splitPosix "home" +-- ("","home") +-- +{-# INLINE splitRoot #-} +splitRoot :: (Unbox a, Integral a) => OS -> Array a -> (Array a, Array a) +splitRoot Posix arr + | isLocation Posix arr + = unsafeSplitTopLevel Posix arr + | otherwise = (Array.empty, arr) +splitRoot Windows arr + | isRelativeCurDriveRoot arr || isRelativeCurDir Windows arr + = unsafeSplitTopLevel Windows arr + | hasDrive arr = unsafeSplitDrive arr + | isAbsoluteUNC arr = unsafeSplitUNC arr + | otherwise = (Array.empty, arr) + -- | Split a path into components separated by the path separator. "." -- components in the path are ignored except in the leading position. Multiple -- consecutive separators are ignored. -- -- >>> :{ --- splitPath Common.Posix = Stream.toList . fmap unpackPosix . Common.splitPath Common.Posix . packPosix --- splitPath Common.Windows = Stream.toList . fmap unpackWindows . Common.splitPath Common.Windows . packWindows +-- splitPosix = Stream.toList . fmap unpackPosix . Common.splitPath Common.Posix . packPosix +-- splitWin = Stream.toList . fmap unpackWindows . Common.splitPath Common.Windows . packWindows -- :} -- --- >>> splitPath Common.Posix "." +-- >>> splitPosix "." -- ["."] -- --- >>> splitPath Common.Posix "././" --- ["."] +-- >>> splitPosix "././" +-- ["./"] -- --- >>> splitPath Common.Posix "./a/b/." --- [".","a","b"] +-- >>> splitPosix "./a/b/." +-- ["./","a","b"] -- --- >>> splitPath Common.Posix "home//user/./..////\\directory/." +-- >>> splitPosix "/" +-- ["/"] +-- +-- >>> splitPosix "/home" +-- ["/","home"] +-- +-- >>> splitWin "/home" +-- ["/","home"] +-- +-- >>> splitPosix "home//user/./..////\\directory/." -- ["home","user","..","\\directory"] -- --- >>> splitPath Common.Windows "home//user/./..////\\directory/." +-- >>> splitWin "home//user/./..////\\directory/." -- ["home","user","..","directory"] -- {-# INLINE splitPath #-} splitPath - :: forall a m. (Unbox a, Integral a, MonadIO m) + :: (Unbox a, Integral a, MonadIO m) => OS -> Array a -> Stream m (Array a) splitPath os arr = - Stream.indexEndBy_ (isSeparatorWord os) (Array.read arr) - & Stream.filter (not . shouldFilterOut) - & fmap (\(i, len) -> Array.getSliceUnsafe i len arr) + let stream = + Stream.indexEndBy_ (isSeparatorWord os) (Array.read rest) + & Stream.filter (not . shouldFilterOut) + & fmap (\(i, len) -> Array.getSliceUnsafe i len rest) + + in if Array.length root == 0 + then stream + else Stream.cons root stream where + (root, rest) = splitRoot os arr + shouldFilterOut (off, len) = len == 0 || - (len == 1 && unsafeIndexChar off arr == '.') + (len == 1 && unsafeIndexChar off rest == '.') -- | Join paths by path separator. Does not check if the paths being appended -- are rooted or path segments. Note that splitting and joining may not give -- exactly the original path but an equivalent, normalized path. {-# INLINE unsafeJoinPaths #-} unsafeJoinPaths - :: forall a m. (Unbox a, Integral a, MonadIO m) + :: (Unbox a, Integral a, MonadIO m) => OS -> Stream m (Array a) -> m (Array a) unsafeJoinPaths os = -- XXX This can be implemented more efficiently using an Array intersperse From 37f95657135b47d9065b9fee724a9ab93fd781aa Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Mon, 30 Dec 2024 12:36:12 +0530 Subject: [PATCH 7/7] Add user documentation in the exposed Path module --- core/src/Streamly/FileSystem/Path.hs | 92 ++++++++++++++++++ core/src/Streamly/Internal/FileSystem/Path.hs | 93 ++----------------- 2 files changed, 100 insertions(+), 85 deletions(-) diff --git a/core/src/Streamly/FileSystem/Path.hs b/core/src/Streamly/FileSystem/Path.hs index c3c6e63098..f0b1e969c2 100644 --- a/core/src/Streamly/FileSystem/Path.hs +++ b/core/src/Streamly/FileSystem/Path.hs @@ -5,6 +5,98 @@ -- Maintainer : streamly@composewell.com -- Portability : GHC -- +-- Well typed, flexible, extensible and efficient file systems paths, +-- preserving the OS and filesystem encoding. +-- +-- /Flexible/: you can choose the level of type safety you want. 'Path' is the +-- basic path type which can represent a file, directory, absolute or relative +-- path with no restrictions. Depending on how much type safety you want, you +-- can choose appropriate type wrappers or a combination of those to wrap the +-- 'Path' type. +-- +-- = Rooted Paths vs Path Segments +-- +-- For the safety of the path append operation we make the distinction of +-- rooted paths vs path segments. A path that starts from some implicit or +-- explicit root in the file system is a rooted path, for example, @\/usr\/bin@ +-- is a rooted path starting from an explicit file system root directory @/@. +-- Similarly, @.\/bin@ is a path with an implicit root, this path is hanging +-- from the current directory. A path that is not rooted is called a path +-- segment e.g. @local\/bin@ is a segment. +-- +-- This distinction affords safety to the path append operation. We can always +-- append a segment to a rooted path or to another segment. However, it does +-- not make sense to append a rooted path to another rooted path. The default +-- append operation in the Path module checks for this and fails if the +-- operation is incorrect. However, the programmer can force it by using the +-- unsafe version of append operation. You can also drop the root explicitly +-- and use the safe append operation. +-- +-- The "Streamly.FileSystem.Path.LocSeg" module provides explicit typing of +-- rooted paths vs path segments. Rooted paths are represented by the @Loc +-- Path@ type and path segments are represented by the @Seg Path@ type. If you +-- use the 'Path' type then append can fail if you try to append a rooted +-- location to another path, but if you use @Loc Path@ or @Seg Path@ types then +-- append can never fail at run time as the types would not allow it at compile +-- time. +-- +-- = Absolute vs Relative Rooted Paths +-- +-- Rooted paths can be absolute or relative. Absolute paths have an absolute +-- root e.g. @\/usr\/bin@. Relative paths have a dynamic or relative root e.g. +-- @.\/local\/bin@, or @.@, in these cases the root is current directory which +-- is not absolute but can change dynamically. Note that there is no type level +-- distinction for absolute and relative paths. +-- +-- = File vs Directory Paths +-- +-- Independently of the rooted or segment distinction you can also make the +-- distinction between files and directories using the +-- "Streamly.FileSystem.Path.FileDir" module. @File Path@ type represents a +-- file whereas @Dir Path@ represents a directory. It provides safety against +-- appending a path to a file. Append operation does not allow appending to +-- 'File' types. +-- +-- By default a path with a trailing separator is implicitly considered a +-- directory path. However, the absence of a trailing separator does not convey +-- any information, it could either be a directory or a file. Thus the append +-- operation allows appending to even the paths that do not have a trailing +-- separator. However, when creating a typed path of 'File' type the conversion +-- fails unless we explicitly drop the trailing separator. +-- +-- = Flexible Typing +-- +-- You can use the 'Loc', 'Seg' or 'Dir', 'File' types independent of each +-- other by using only the required module. If you want both types of +-- distinctions then you can use them together as well using the +-- "Streamly.FileSystem.Path.Typed" module. For example, the @Loc (Dir Path)@ +-- represents a rooted path which is a directory. You can only append to a path +-- that has 'Dir' in it and you can only append a 'Seg' type. +-- +-- You can choose to use just the basic 'Path' type or any combination of safer +-- types. You can upgrade or downgrade the safety using the @adapt@ operation. +-- Whenever a less restrictive path type is converted to a more restrictive +-- path type, the conversion involves run-time checks and it may fail. However, +-- a more restrictive path type can be freely converted to a less restrictive +-- one. +-- +-- = Extensibility +-- +-- Extensible, you can define your own newtype wrappers similar to 'File' or +-- 'Dir' to provide custom restrictions if you want. +-- +-- = Compatibility +-- +-- Any path type can be converted to the 'FilePath' type using the 'toString' +-- operation. Operations to convert to and from 'OsPath' type at zero cost are +-- provided in the @streamly-filepath@ package. This is possible because the +-- types use the same underlying representation as the 'OsPath' type. +-- +-- = String Creation Quasiquoter +-- +-- You may find the 'str' quasiquoter from "Streamly.Unicode.String" to be +-- useful in creating paths. +-- module Streamly.FileSystem.Path ( diff --git a/core/src/Streamly/Internal/FileSystem/Path.hs b/core/src/Streamly/Internal/FileSystem/Path.hs index bd2ee4f275..d092aa0297 100644 --- a/core/src/Streamly/Internal/FileSystem/Path.hs +++ b/core/src/Streamly/Internal/FileSystem/Path.hs @@ -5,97 +5,20 @@ -- Maintainer : streamly@composewell.com -- Portability : GHC -- --- = User Notes --- --- Well typed, flexible, extensible and efficient file systems paths, --- preserving the OS and filesystem encoding. --- --- /Flexible/: you can choose the level of type safety you want. 'Path' is the --- basic path type which can represent a file, directory, absolute or relative --- path with no restrictions. Depending on how much type safety you want, you --- can choose appropriate type wrappers or a combination of those to wrap the --- 'Path' type. --- --- The basic type-safety is provided by the --- "Streamly.Internal.FileSystem.PosixPath.LocSeg" module. We make a --- distinction between two types of paths viz. locations and segments. --- Locations are represented by the @Loc Path@ type and path segments are --- represented by the @Seg Path@ type. --- --- Locations are rooted paths, they have a "root" attached to them. Rooted --- paths can be absolute or relative. Absolute paths have an absolute root e.g. --- @\/usr\/bin@. Relative paths have a dynamic or relative root e.g. --- @.\/local\/bin@, or @.@, in these cases the root is current directory which --- is not absolute but can change dynamically, nevertheless these are rooted --- paths as they still refer to a specific location starting from some root in --- the file system even though the root is decided dynamically. --- --- In contrast to rooted paths, path segments are simply a sequence of path --- components without any reference to a root or specific starting location --- e.g. @usr\/bin@, @local\/bin@, or @../bin@ are simply path segments which --- can be attached to any other path or segment to augment it. This distinction --- is made to allow for safe append operation on paths, you can only append --- path segments to any path, a rooted path cannot be appended to another path. --- If you use the 'Path' type then append can fail if you try to append a --- rooted location to another path, but if you use @Loc Path@ or @Seg Path@ --- types then append can never fail at run time as the types would not allow --- it. --- --- To summarize the conceptual distinctions: --- * Path --- * Rooted location --- * Absolute --- * Relative --- * Unrooted segment --- --- Independently of the location or segment distinction you can also make the --- distinction between files and directories using the --- "Streamly.Internal.FileSystem.PosixPath.FileDir" module. @File Path@ type --- represents a file whereas @Dir Path@ represents a directory. It provides --- safety against appending a path to a file. Append operation allows appending --- to only 'Dir' types. --- --- You can use the 'Loc', 'Seg' or 'Dir', 'File' types independent of each --- other by using only the required module. If you want both types of --- distinctions then you can use them together as well using the --- "Streamly.Internal.FileSystem.PosixPath.Typed" module. For example, the --- @Loc (Dir Path)@ represents a location which is a directory. You can only --- append to a path that has 'Dir' in it and you can only append a 'Seg' type. --- --- You can choose to use just the basic 'Path' type or any combination of safer --- types. You can upgrade or downgrade the safety using the @adapt@ operation. --- Whenever a less restrictive path type is converted to a more restrictive --- path type, the conversion involves run-time checks and it may fail. However, --- a more restrictive path type can be freely converted to a less restrictive --- one. --- --- Extensible, you can define your own newtype wrappers similar to 'File' or --- 'Dir' to provide custom restrictions if you want. --- --- Any path type can be converted to the 'FilePath' type using the 'toString' --- operation. Operations to convert to and from 'OsPath' type at zero cost are --- provided in the @streamly-filepath@ package. The types use the same --- underlying representation as the 'OsPath' type. --- --- = Developer Notes: +-- == References +-- +-- * https://en.wikipedia.org/wiki/Path_(computing) +-- * https://learn.microsoft.com/en-us/windows/win32/fileio/naming-a-file +-- * https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-dtyp/62e862f4-2a51-452e-8eeb-dc4ff5ee33cc +-- +-- == Windows and Posix Paths -- -- We should be able to manipulate windows paths on posix and posix paths on -- windows as well. Therefore, we have WindowsPath and PosixPath types which -- are supported on both platforms. However, the Path module aliases Path to -- WindowsPath on Windows and PosixPath on Posix. -- --- Conventions: A trailing separator on a path indicates that it is a --- directory. However, the absence of a trailing separator does not convey any --- information, it could either be a directory or a file. --- --- You may also find the 'str' quasiquoter from "Streamly.Unicode.String" to be --- useful in creating paths. --- --- * https://en.wikipedia.org/wiki/Path_(computing) --- * https://learn.microsoft.com/en-us/windows/win32/fileio/naming-a-file --- * https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-dtyp/62e862f4-2a51-452e-8eeb-dc4ff5ee33cc --- --- == File System Tree +-- == File System as Tree vs Graph -- -- A file system is a tree when there are no hard links or symbolic links. But -- in the presence of symlinks it could be a DAG or a graph, because directory