From b81862d9fff8ab57fbdbc7edc0bea0c11462d806 Mon Sep 17 00:00:00 2001 From: Adithya Kumar Date: Thu, 5 Dec 2024 20:39:07 +0530 Subject: [PATCH] Add a routine for path normalization --- .../Internal/FileSystem/Path/Common.hs | 55 +++++++++++++++++- .../Streamly/Internal/FileSystem/PosixPath.hs | 47 +++++++++++++++ streamly.cabal | 1 + test/Streamly/Test/FileSystem/Path.hs | 58 +++++++++++++++++++ test/streamly-tests.cabal | 6 ++ 5 files changed, 166 insertions(+), 1 deletion(-) create mode 100644 test/Streamly/Test/FileSystem/Path.hs diff --git a/core/src/Streamly/Internal/FileSystem/Path/Common.hs b/core/src/Streamly/Internal/FileSystem/Path/Common.hs index 1c0767ecb4..8b3200b036 100644 --- a/core/src/Streamly/Internal/FileSystem/Path/Common.hs +++ b/core/src/Streamly/Internal/FileSystem/Path/Common.hs @@ -24,6 +24,9 @@ module Streamly.Internal.FileSystem.Path.Common , toString , toChars + -- * Conversion + , normalize + -- * Operations , primarySeparator , isSeparator @@ -45,6 +48,7 @@ where import Control.Monad.Catch (MonadThrow(..)) import Data.Char (ord, isAlpha) +import Data.Function ((&)) import Data.Functor.Identity (Identity(..)) #ifdef DEBUG import Data.Maybe (fromJust) @@ -54,7 +58,7 @@ import GHC.Base (unsafeChr) import Language.Haskell.TH (Q, Exp) import Language.Haskell.TH.Quote (QuasiQuoter (..)) import Streamly.Internal.Data.Array (Array(..)) -import Streamly.Internal.Data.MutByteArray (Unbox) +import Streamly.Internal.Data.MutByteArray (Unbox(..)) import Streamly.Internal.Data.Path (PathException(..)) import Streamly.Internal.Data.Stream (Stream) import System.IO.Unsafe (unsafePerformIO) @@ -367,3 +371,52 @@ append :: (Unbox a, Integral a) => OS -> (Array a -> String) -> Array a -> Array a -> Array a append os toStr a b = withAppendCheck os toStr b (doAppend os a b) + +{-# INLINE normalize #-} +normalize :: forall a. (Unbox a, Integral a) => OS -> Array a -> Array a +normalize os arr = + if arrElemLen == 1 + then arr + else Array.unsafeFreeze $ unsafePerformIO $ do + let workSliceMut = Array.unsafeThaw workSlice + workSliceStream = MutArray.read workSliceMut + (mid :: MutArray.MutArray a) <- + Stream.indexOnSuffix (== sepElem) workSliceStream + & Stream.filter (not . shouldFilterOut) + & fmap (\(i, len) -> getSliceWithSepSuffix i len workSliceMut) + & Stream.fold (Fold.foldlM' MutArray.unsafeSplice initBufferM) + if startsWithDotSlash && MutArray.length mid == 0 + then MutArray.fromListN 2 [fstElem, sndElem] + else pure mid + + where + + sepElem = fromIntegral (ord (primarySeparator os)) + dotElem = fromIntegral (ord '.') + arrElemLen = Array.length arr + + fstElem = Array.getIndexUnsafe 0 arr + sndElem = Array.getIndexUnsafe 1 arr + + startsWithSep = fstElem == sepElem + startsWithDotSlash = fstElem == dotElem && sndElem == sepElem + + workSlice + | startsWithSep = Array.getSliceUnsafe 1 (arrElemLen - 1) arr + | startsWithDotSlash = Array.getSliceUnsafe 2 (arrElemLen - 2) arr + | otherwise = arr + workSliceElemLen = Array.length workSlice + + shouldFilterOut (off, len) = + len == 0 || + (len == 1 && Array.getIndexUnsafe off workSlice == dotElem) + + getSliceWithSepSuffix i len + | i + len == workSliceElemLen = MutArray.unsafeGetSlice i len + getSliceWithSepSuffix i len = MutArray.unsafeGetSlice i (len + 1) + + initBufferM = do + (newArr :: MutArray.MutArray a) <- MutArray.emptyOf arrElemLen + if startsWithSep + then MutArray.unsafeSnoc newArr fstElem + else pure newArr diff --git a/core/src/Streamly/Internal/FileSystem/PosixPath.hs b/core/src/Streamly/Internal/FileSystem/PosixPath.hs index 3c7ba4392f..513bf29f2a 100644 --- a/core/src/Streamly/Internal/FileSystem/PosixPath.hs +++ b/core/src/Streamly/Internal/FileSystem/PosixPath.hs @@ -41,6 +41,7 @@ module Streamly.Internal.FileSystem.OS_PATH -- * Conversions , IsPath (..) , adapt + , normalize -- * Construction , fromChunk @@ -360,3 +361,49 @@ append (OS_PATH a) (OS_PATH b) = OS_PATH $ Common.append Common.OS_NAME (Common.toString Unicode.UNICODE_DECODER) a b + +-- | Normalize the path. +-- +-- The behaviour is similar to FilePath.normalise. +-- +-- >>> Path.toString $ Path.normalize $ [path|/file/\test////|] +-- "/file/\\test/" +-- +-- >>> Path.toString $ Path.normalize $ [path|/file/./test|] +-- "/file/test" +-- +-- >>> Path.toString $ Path.normalize $ [path|/test/file/../bob/fred/|] +-- "/test/file/../bob/fred/" +-- +-- >>> Path.toString $ Path.normalize $ [path|../bob/fred/|] +-- "../bob/fred/" +-- +-- >>> Path.toString $ Path.normalize $ [path|/a/../c|] +-- "/a/../c" +-- +-- >>> Path.toString $ Path.normalize $ [path|./bob/fred/|] +-- "bob/fred/" +-- +-- >>> Path.toString $ Path.normalize $ [path|.|] +-- "." +-- +-- >>> Path.toString $ Path.normalize $ [path|./|] +-- "./" +-- +-- >>> Path.toString $ Path.normalize $ [path|./.|] +-- "./" +-- +-- >>> Path.toString $ Path.normalize $ [path|/./|] +-- "/" +-- +-- >>> Path.toString $ Path.normalize $ [path|/|] +-- "/" +-- +-- >>> Path.toString $ Path.normalize $ [path|bob/fred/.|] +-- "bob/fred/" +-- +-- >>> Path.toString $ Path.normalize $ [path|//home|] +-- "/home" +-- +normalize :: OS_PATH -> OS_PATH +normalize (OS_PATH a) = OS_PATH $ Common.normalize Common.OS_NAME a diff --git a/streamly.cabal b/streamly.cabal index 6c7295e28b..ca092d096b 100644 --- a/streamly.cabal +++ b/streamly.cabal @@ -127,6 +127,7 @@ extra-source-files: test/Streamly/Test/FileSystem/Event/Windows.hs test/Streamly/Test/FileSystem/Event/Linux.hs test/Streamly/Test/FileSystem/Handle.hs + test/Streamly/Test/FileSystem/Path.hs test/Streamly/Test/Network/Socket.hs test/Streamly/Test/Network/Inet/TCP.hs test/Streamly/Test/Prelude.hs diff --git a/test/Streamly/Test/FileSystem/Path.hs b/test/Streamly/Test/FileSystem/Path.hs new file mode 100644 index 0000000000..5b525b4f1c --- /dev/null +++ b/test/Streamly/Test/FileSystem/Path.hs @@ -0,0 +1,58 @@ +-- | +-- Module : Streamly.Test.FileSystem.Path +-- Copyright : (c) 2021 Composewell Technologies +-- License : BSD-3-Clause +-- Maintainer : streamly@composewell.com +-- Stability : experimental +-- Portability : GHC +-- + +module Streamly.Test.FileSystem.Path (main) where + +import qualified System.FilePath as FilePath +import qualified Streamly.Internal.FileSystem.Path as Path + +import Test.Hspec as H + +moduleName :: String +moduleName = "FileSystem.Path" + +testNormalize :: String -> Spec +testNormalize inp = + it ("normalize: " ++ show inp) $ do + p <- Path.fromString inp + let expected = FilePath.normalise inp + got = Path.toString (Path.normalize p) + got `shouldBe` expected + +main :: IO () +main = + hspec $ + H.parallel $ + describe moduleName $ do + describe "normalize" $ do + -- Primarily for Windows + testNormalize "c:\\file/bob\\" + testNormalize "c:\\" + testNormalize "c:\\\\\\\\" + testNormalize "C:.\\" + testNormalize "\\\\server\\test" + testNormalize "//server/test" + testNormalize "c:/file" + testNormalize "/file" + testNormalize "\\" + -- Primarily for Posix + testNormalize "/./" + testNormalize "/file/\\test////" + testNormalize "/file/./test" + testNormalize "/test/file/../bob/fred/" + testNormalize "../bob/fred/" + testNormalize "/a/../c" + testNormalize "./bob/fred/" + testNormalize "." + testNormalize "./" + testNormalize "./." + testNormalize "/./" + testNormalize "/" + testNormalize "bob/fred/." + testNormalize "//home" diff --git a/test/streamly-tests.cabal b/test/streamly-tests.cabal index bef27c55f4..4564d2a87b 100644 --- a/test/streamly-tests.cabal +++ b/test/streamly-tests.cabal @@ -445,6 +445,12 @@ test-suite FileSystem.Handle if flag(use-streamly-core) buildable: False +test-suite FileSystem.Path + import: test-options + type: exitcode-stdio-1.0 + main-is: Streamly/Test/FileSystem/Path.hs + ghc-options: -main-is Streamly.Test.FileSystem.Path.main + test-suite Network.Inet.TCP import: lib-options type: exitcode-stdio-1.0