diff --git a/core/src/Streamly/Internal/FileSystem/DirIO.hs b/core/src/Streamly/Internal/FileSystem/DirIO.hs index 751f064f2e..f10561c7e3 100644 --- a/core/src/Streamly/Internal/FileSystem/DirIO.hs +++ b/core/src/Streamly/Internal/FileSystem/DirIO.hs @@ -93,7 +93,7 @@ import Streamly.Internal.FileSystem.Windows.ReadDir #else import Streamly.Internal.FileSystem.Posix.ReadDir ( DirStream, openDirStream, closeDirStream, readDirStreamEither - , readEitherChunks) + , readEitherChunks, PathClassified, evaluateUnknown, unClassifyPath) #endif import qualified Streamly.Internal.Data.Stream as S import qualified Streamly.Data.Unfold as UF @@ -238,7 +238,7 @@ toStreamWithBufferOf chunkSize h = AS.concat $ toChunksWithBufferOf chunkSize h {-# INLINE streamEitherReader #-} streamEitherReader :: MonadIO m => - Unfold m DirStream (Either Path Path) + Unfold m DirStream PathClassified streamEitherReader = Unfold step return where @@ -246,11 +246,11 @@ streamEitherReader = Unfold step return r <- liftIO $ readDirStreamEither strm case r of Nothing -> return Stop - Just x -> return $ Yield x strm + Just (x) -> return $ Yield x strm {-# INLINE streamReader #-} streamReader :: MonadIO m => Unfold m DirStream Path -streamReader = fmap (either id id) streamEitherReader +streamReader = fmap unClassifyPath streamEitherReader -- | Read a directory emitting a stream with names of the children. Filter out -- "." and ".." entries. @@ -283,7 +283,12 @@ eitherReader = -- XXX The measured overhead of bracketIO is not noticeable, if it turns -- out to be a problem for small filenames we can use getdents64 to use -- chunked read to avoid the overhead. - UF.bracketIO openDirStream closeDirStream streamEitherReader + UF.bracketIO + (\parent -> (parent,) <$> openDirStream parent) + (\(_, dirStream) -> closeDirStream dirStream) + (UF.mapM2 + (\(parent, _) p -> liftIO (evaluateUnknown parent p)) + (UF.lmap snd streamEitherReader)) {-# INLINE eitherReaderPaths #-} eitherReaderPaths ::(MonadIO m, MonadCatch m) => diff --git a/core/src/Streamly/Internal/FileSystem/Posix/ReadDir.c b/core/src/Streamly/Internal/FileSystem/Posix/ReadDir.c new file mode 100644 index 0000000000..cd482a80e7 --- /dev/null +++ b/core/src/Streamly/Internal/FileSystem/Posix/ReadDir.c @@ -0,0 +1,16 @@ +#include + +int stat_is_directory(const char *path) { + struct stat statbuf; + + // Call stat to get the file status + if (stat(path, &statbuf) == 0) { + // Check if the file is a directory using S_ISDIR macro + if (S_ISDIR(statbuf.st_mode)) { + return 1; // It is a directory + } else { + return 0; // Not a directory + } + } + return -1; // An error occurred (stat failed) +} diff --git a/core/src/Streamly/Internal/FileSystem/Posix/ReadDir.hsc b/core/src/Streamly/Internal/FileSystem/Posix/ReadDir.hsc index fbcf533a25..f0afd94191 100644 --- a/core/src/Streamly/Internal/FileSystem/Posix/ReadDir.hsc +++ b/core/src/Streamly/Internal/FileSystem/Posix/ReadDir.hsc @@ -10,6 +10,9 @@ module Streamly.Internal.FileSystem.Posix.ReadDir ( #if !defined(mingw32_HOST_OS) && !defined(__MINGW32__) DirStream + , PathClassified(..) + , unClassifyPath + , evaluateUnknown , openDirStream , closeDirStream , readDirStreamEither @@ -80,6 +83,15 @@ data {-# CTYPE "struct dirent" #-} CDirent newtype DirStream = DirStream (Ptr CDir) +------------------------------------------------------------------------------- +-- Stat +------------------------------------------------------------------------------- + +foreign import ccall unsafe "stat_is_directory" + c_stat_is_directory :: CString -> IO CInt + +------------------------------------------------------------------------------- +-- Functions ------------------------------------------------------------------------------- foreign import ccall unsafe "closedir" @@ -133,6 +145,42 @@ isMetaDir dname = do then return True else return False +statCheckIfDir :: PosixPath -> IO Bool +statCheckIfDir path = + Array.asCStringUnsafe (Path.toChunk path) $ \cStr -> do + res <- c_stat_is_directory cStr + case res of + x | x == 0 -> pure True + x | x == 1 -> pure False + _ -> throwErrno "checkIfDirectory" + +{-# INLINE appendCString #-} +appendCString :: PosixPath -> CString -> IO PosixPath +appendCString a b = do + b1 <- Array.fromCString (castPtr b) + pure $ Path.append a (Path.unsafeFromChunk b1) + +data PathClassified + = PCDir PosixPath + | PCFile PosixPath + | PCUnknown PosixPath + +unClassifyPath :: PathClassified -> PosixPath +unClassifyPath (PCDir a) = a +unClassifyPath (PCFile a) = a +unClassifyPath (PCUnknown a) = a + +evaluateUnknown + :: PosixPath -> PathClassified -> IO (Either PosixPath PosixPath) +evaluateUnknown _ (PCDir a) = pure $ Left a +evaluateUnknown _ (PCFile a) = pure $ Right a +evaluateUnknown parent (PCUnknown child) = do + statIsDir <- statCheckIfDir $ Path.append parent child + pure + $ if statIsDir + then Left child + else Right child + -- XXX We can use getdents64 directly so that we can use array slices from the -- same buffer that we passed to the OS. That way we can also avoid any -- overhead of bracket. @@ -141,7 +189,7 @@ isMetaDir dname = do -- {-# INLINE readDirStreamEither #-} readDirStreamEither :: -- DirStream -> IO (Either (Rel (Dir Path)) (Rel (File Path))) - DirStream -> IO (Maybe (Either PosixPath PosixPath)) + DirStream -> IO (Maybe PathClassified) readDirStreamEither (DirStream dirp) = loop where @@ -168,8 +216,10 @@ readDirStreamEither (DirStream dirp) = loop isMeta <- isMetaDir dname if isMeta then loop - else return (Just (Left (mkPath name))) - else return (Just (Right (mkPath name))) + else return (Just (PCDir (mkPath name))) + else if (dtype == #const DT_UNKNOWN) + then pure (Just (PCUnknown (mkPath name))) + else return (Just (PCFile (mkPath name))) else do errno <- getErrno if (errno == eINTR) @@ -208,9 +258,6 @@ readEitherChunks alldirs = dirMax = 4 fileMax = 1000 - mkPath :: Array Word8 -> PosixPath - mkPath = Path.unsafeFromChunk - step _ (ChunkStreamInit (x:xs) dirs ndirs files nfiles) = do DirStream dirp <- liftIO $ openDirStream x return $ Skip (ChunkStreamLoop x xs dirp dirs ndirs files nfiles) @@ -233,10 +280,12 @@ readEitherChunks alldirs = dtype :: #{type unsigned char} <- liftIO $ #{peek struct dirent, d_type} dentPtr - name <- Array.fromCString (castPtr dname) - let path = Path.append curdir (mkPath name) - - if (dtype == (#const DT_DIR)) + path <- liftIO $ appendCString curdir dname + statIsDir <- + if dtype == #const DT_UNKNOWN + then liftIO $ statCheckIfDir path + else pure False + if dtype == (#const DT_DIR) || statIsDir then do isMeta <- liftIO $ isMetaDir dname if isMeta @@ -330,9 +379,6 @@ readEitherByteChunks alldirs = -- from the output channel, then consume that stream by using a monad bind. bufSize = 4000 - mkPath :: Array Word8 -> PosixPath - mkPath = Path.unsafeFromChunk - copyToBuf dstArr pos dirPath name = do nameLen <- fmap fromIntegral (liftIO $ c_strlen name) let PosixPath (Array dirArr start end) = dirPath @@ -399,7 +445,11 @@ readEitherByteChunks alldirs = -- XXX Skips come around the entire loop, does that impact perf -- because it has a StreamK in the middle. -- Keep the file check first as it is more likely - if (dtype /= (#const DT_DIR)) + statIsDir <- + if dtype == #const DT_UNKNOWN + then liftIO (appendCString curdir dname >>= statCheckIfDir) + else pure False + if dtype /= (#const DT_DIR) && not statIsDir then do r <- copyToBuf mbarr pos curdir dname case r of @@ -419,9 +469,8 @@ readEitherByteChunks alldirs = if isMeta then return $ Skip st else do - name <- Array.fromCString (castPtr dname) - let path = Path.append curdir (mkPath name) - dirs1 = path : dirs + path <- liftIO $ appendCString curdir dname + let dirs1 = path : dirs ndirs1 = ndirs + 1 r <- copyToBuf mbarr pos curdir dname case r of diff --git a/core/src/Streamly/Internal/FileSystem/Windows/ReadDir.hsc b/core/src/Streamly/Internal/FileSystem/Windows/ReadDir.hsc index d1f8672f53..6e43b9d867 100644 --- a/core/src/Streamly/Internal/FileSystem/Windows/ReadDir.hsc +++ b/core/src/Streamly/Internal/FileSystem/Windows/ReadDir.hsc @@ -10,6 +10,9 @@ module Streamly.Internal.FileSystem.Windows.ReadDir ( #if defined(mingw32_HOST_OS) || defined(__MINGW32__) DirStream + , PathClassified(..) + , unClassifyPath + , evaluateUnknown , openDirStream , closeDirStream , readDirStreamEither @@ -57,6 +60,19 @@ type LPCTSTR = Ptr CWchar type WIN32_FIND_DATA = () type HANDLE = Ptr () +------------------------------------------------------------------------------ +-- Commonization helpers +------------------------------------------------------------------------------ + +type PathClassified = Either WindowsPath WindowsPath + +unClassifyPath :: PathClassified -> WindowsPath +unClassifyPath = either id id + +evaluateUnknown + :: WindowsPath -> PathClassified -> IO (Either WindowsPath WindowsPath) +evaluateUnknown _ = pure + ------------------------------------------------------------------------------ -- Windows C APIs ------------------------------------------------------------------------------ @@ -104,7 +120,7 @@ failWith fn_name err_code = do c_msg <- getErrorMessage err_code msg <- if c_msg == nullPtr then return $ "Error 0x" ++ Numeric.showHex err_code "" - else do + else do msg <- peekCWString c_msg -- We ignore failure of freeing c_msg, given we're already failing _ <- localFree c_msg @@ -145,8 +161,8 @@ openDirStream p = do Array.asCStringUnsafe (Path.toChunk path) $ \pathPtr -> do -- XXX Use getLastError to distinguish the case when no -- matching file is found. See the doc of FindFirstFileW. - failIf - (== iNVALID_HANDLE_VALUE) + failIf + (== iNVALID_HANDLE_VALUE) ("FindFirstFileW: " ++ Path.toString path) $ c_FindFirstFileW (castPtr pathPtr) dataPtr ref <- newIORef True diff --git a/core/streamly-core.cabal b/core/streamly-core.cabal index b8df8a60a2..4639c32fde 100644 --- a/core/streamly-core.cabal +++ b/core/streamly-core.cabal @@ -288,6 +288,7 @@ library , src/Streamly/Internal/Data/Stream c-sources: src/Streamly/Internal/Data/MutArray/Lib.c + , src/Streamly/Internal/FileSystem/Posix/ReadDir.c -- Prefer OS conditionals inside the source files rather than here, -- conditionals here do not work well with cabal2nix.