From 4557f898fb7f9ec50d9f6ab94dc377504336dba8 Mon Sep 17 00:00:00 2001 From: Adithya Kumar Date: Sat, 21 Dec 2024 00:58:58 +0530 Subject: [PATCH] Make empty stream behavior of splitSepBySeq_ similar to splitSepBy_ --- .../Streamly/Internal/Data/Stream/Nesting.hs | 54 ++++++++++++++++--- test/Streamly/Test/Data/Stream.hs | 17 ++++-- 2 files changed, 60 insertions(+), 11 deletions(-) diff --git a/core/src/Streamly/Internal/Data/Stream/Nesting.hs b/core/src/Streamly/Internal/Data/Stream/Nesting.hs index e02af947aa..575c8be9f3 100644 --- a/core/src/Streamly/Internal/Data/Stream/Nesting.hs +++ b/core/src/Streamly/Internal/Data/Stream/Nesting.hs @@ -2758,12 +2758,15 @@ data SplitOnSeqState mba rb rh ck w fs s b x = | SplitOnSeqEmpty !fs s + | SplitOnSeqSingle0 !fs s x | SplitOnSeqSingle !fs s x - | SplitOnSeqWordInit !fs s + | SplitOnSeqWordInit0 !fs s + | SplitOnSeqWordInit Int Word !fs s | SplitOnSeqWordLoop !w s !fs | SplitOnSeqWordDone Int !fs !w + | SplitOnSeqKRInit0 Int !fs s mba | SplitOnSeqKRInit Int !fs s mba | SplitOnSeqKRLoop fs s mba !rh !ck | SplitOnSeqKRCheck fs s mba !rh @@ -2846,13 +2849,13 @@ splitSepBySeq_ patArr (Fold fstep initial _ final) (Stream step state) = return $ Skip $ SplitOnSeqEmpty acc state | patLen == 1 -> do pat <- liftIO $ A.unsafeIndexIO 0 patArr - return $ Skip $ SplitOnSeqSingle acc state pat + return $ Skip $ SplitOnSeqSingle0 acc state pat | SIZE_OF(a) * patLen <= sizeOf (Proxy :: Proxy Word) -> - return $ Skip $ SplitOnSeqWordInit acc state + return $ Skip $ SplitOnSeqWordInit0 acc state | otherwise -> do (MutArray mba _ _ _) :: MutArray a <- liftIO $ MutArray.emptyOf patLen - skip $ SplitOnSeqKRInit 0 acc state mba + skip $ SplitOnSeqKRInit0 0 acc state mba FL.Done b -> skip $ SplitOnSeqYield b SplitOnSeqInit stepOuter _ (SplitOnSeqYield x next) = return $ Yield x next @@ -2892,6 +2895,23 @@ splitSepBySeq_ patArr (Fold fstep initial _ final) (Stream step state) = -- Single Pattern ----------------- + -- TODO: Commonize the Yield part and check the performance + stepOuter gst (SplitOnSeqSingle0 fs st pat) = do + res <- step (adaptState gst) st + case res of + Yield x s -> do + let jump c = SplitOnSeqSingle c s pat + if pat == x + then final fs >>= yieldReinit jump + else do + r <- fstep fs x + case r of + FL.Partial fs1 -> + pure $ Skip $ SplitOnSeqSingle fs1 s pat + FL.Done b -> yieldReinit jump b + Skip s -> pure $ Skip $ SplitOnSeqSingle0 fs s pat + Stop -> final fs >> pure Stop + stepOuter gst (SplitOnSeqSingle fs0 st0 pat) = do go SPEC fs0 st0 @@ -2938,8 +2958,17 @@ splitSepBySeq_ patArr (Fold fstep initial _ final) (Stream step state) = let jump c = SplitOnSeqWordDone (n - 1) c wrd yieldReinit jump b - stepOuter gst (SplitOnSeqWordInit fs st0) = - go SPEC 0 0 st0 + stepOuter gst (SplitOnSeqWordInit0 fs st) = do + res <- step (adaptState gst) st + case res of + Yield x s -> + let wrd1 = addToWord 0 x + in pure $ Skip $ SplitOnSeqWordInit 1 wrd1 fs s + Skip s -> pure $ Skip $ SplitOnSeqWordInit0 fs s + Stop -> final fs >> pure Stop + + stepOuter gst (SplitOnSeqWordInit idx0 wrd0 fs st0) = + go SPEC idx0 wrd0 st0 where @@ -2953,7 +2982,7 @@ splitSepBySeq_ patArr (Fold fstep initial _ final) (Stream step state) = then do if wrd1 .&. wordMask == wordPat then do - let jump c = SplitOnSeqWordInit c s + let jump c = SplitOnSeqWordInit 0 0 c s final fs >>= yieldReinit jump else skip $ SplitOnSeqWordLoop wrd1 s fs else go SPEC (idx + 1) wrd1 s @@ -2977,7 +3006,7 @@ splitSepBySeq_ patArr (Fold fstep initial _ final) (Stream step state) = res <- step (adaptState gst) st case res of Yield x s -> do - let jump c = SplitOnSeqWordInit c s + let jump c = SplitOnSeqWordInit 0 0 c s wrd1 = addToWord wrd x old = (wordMask .&. wrd) `shiftR` (elemBits * (patLen - 1)) @@ -3000,6 +3029,15 @@ splitSepBySeq_ patArr (Fold fstep initial _ final) (Stream step state) = -- manipulated locally e.g. we are passing only mba, here and build an -- array using patLen and arrStart from the surrounding context. + stepOuter gst (SplitOnSeqKRInit0 offset fs st mba) = do + res <- step (adaptState gst) st + case res of + Yield x s -> do + liftIO $ pokeAt offset mba x + skip $ SplitOnSeqKRInit (offset + SIZE_OF(a)) fs s mba + Skip s -> skip $ SplitOnSeqKRInit0 offset fs s mba + Stop -> final fs >> pure Stop + stepOuter gst (SplitOnSeqKRInit offset fs st mba) = do res <- step (adaptState gst) st case res of diff --git a/test/Streamly/Test/Data/Stream.hs b/test/Streamly/Test/Data/Stream.hs index c19338c565..7bd60b5196 100644 --- a/test/Streamly/Test/Data/Stream.hs +++ b/test/Streamly/Test/Data/Stream.hs @@ -103,8 +103,12 @@ splitOnSeq :: splitOnSeq op = do describe "Tests for splitOnSeq" $ do -- Empty pattern case + it "splitOnSeq_ \"\" \"\" = []" + $ splitOnSeq_ "" "" `shouldReturn` [] -- Single element pattern cases + it "splitOnSeq_ \"x\" \"\" = []" + $ splitOnSeq_ "x" "" `shouldReturn` [] it "splitOnSeq_ \"x\" \"hello\" = [\"hello\"]" $ splitOnSeq_ "x" "hello" `shouldReturn` ["hello"] it "splitOnSeq_ \"h\" \"hello\" = [\"\", \"ello\"]" @@ -117,6 +121,8 @@ splitOnSeq op = do $ splitOnSeq_ "o" "hello" `shouldReturn` ["hell", ""] -- multi-element pattern fitting in a Word + it "splitOnSeq_ \"he\" \"\" = []" + $ splitOnSeq_ "he" "" `shouldReturn` [] it "splitOnSeq_ \"he\" \"hello\" = [\"\", \"llo\"]" $ splitOnSeq_ "he" "hello" `shouldReturn` ["", "llo"] it "splitOnSeq_ \"ll\" \"hello\" = [\"he\", \"o\"]" @@ -125,8 +131,8 @@ splitOnSeq op = do $ splitOnSeq_ "lo" "hello" `shouldReturn` ["hel", ""] -- multi-element pattern - Rabin-Karp cases - it "splitOnSeq_ \"hello\" \"\" = [\"\"]" - $ splitOnSeq_ "hello" "" `shouldReturn` [""] + it "splitOnSeq_ \"hello\" \"\" = []" + $ splitOnSeq_ "hello" "" `shouldReturn` [] it "splitOnSeq_ \"hel\" \"hello\" = [\"\", \"lo\"]" $ splitOnSeq_ "hel" "hello" `shouldReturn` ["", "lo"] it "splitOnSeq_ \"ell\" \"hello\" = [\"h\", \"o\"]" @@ -403,7 +409,12 @@ intercalateSplitOnId x desc = groupSplitOps :: String -> Spec groupSplitOps desc = do -- splitting - splitOnSeq splitOnSeqFold + + -- The foldManyPost implementation on an empty stream produces a single + -- value. The behaviour of foldManyPost implementation and the direct stream + -- implementation is not different. + -- splitOnSeq splitOnSeqFold + splitOnSeq splitOnSeqStream splitOnSuffixSeq splitOnSuffixSeqFold