Skip to content

Commit

Permalink
Make empty stream behavior of splitSepBySeq_ similar to splitSepBy_
Browse files Browse the repository at this point in the history
  • Loading branch information
adithyaov committed Dec 20, 2024
1 parent 17e45a8 commit 4557f89
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 11 deletions.
54 changes: 46 additions & 8 deletions core/src/Streamly/Internal/Data/Stream/Nesting.hs
Original file line number Diff line number Diff line change
Expand Up @@ -2758,12 +2758,15 @@ data SplitOnSeqState mba rb rh ck w fs s b x =

| SplitOnSeqEmpty !fs s

| SplitOnSeqSingle0 !fs s x
| SplitOnSeqSingle !fs s x

| SplitOnSeqWordInit !fs s
| SplitOnSeqWordInit0 !fs s
| SplitOnSeqWordInit Int Word !fs s
| SplitOnSeqWordLoop !w s !fs
| SplitOnSeqWordDone Int !fs !w

| SplitOnSeqKRInit0 Int !fs s mba
| SplitOnSeqKRInit Int !fs s mba
| SplitOnSeqKRLoop fs s mba !rh !ck
| SplitOnSeqKRCheck fs s mba !rh
Expand Down Expand Up @@ -2846,13 +2849,13 @@ splitSepBySeq_ patArr (Fold fstep initial _ final) (Stream step state) =
return $ Skip $ SplitOnSeqEmpty acc state
| patLen == 1 -> do
pat <- liftIO $ A.unsafeIndexIO 0 patArr
return $ Skip $ SplitOnSeqSingle acc state pat
return $ Skip $ SplitOnSeqSingle0 acc state pat
| SIZE_OF(a) * patLen <= sizeOf (Proxy :: Proxy Word) ->
return $ Skip $ SplitOnSeqWordInit acc state
return $ Skip $ SplitOnSeqWordInit0 acc state
| otherwise -> do
(MutArray mba _ _ _) :: MutArray a <-
liftIO $ MutArray.emptyOf patLen
skip $ SplitOnSeqKRInit 0 acc state mba
skip $ SplitOnSeqKRInit0 0 acc state mba
FL.Done b -> skip $ SplitOnSeqYield b SplitOnSeqInit

stepOuter _ (SplitOnSeqYield x next) = return $ Yield x next
Expand Down Expand Up @@ -2892,6 +2895,23 @@ splitSepBySeq_ patArr (Fold fstep initial _ final) (Stream step state) =
-- Single Pattern
-----------------

-- TODO: Commonize the Yield part and check the performance
stepOuter gst (SplitOnSeqSingle0 fs st pat) = do
res <- step (adaptState gst) st
case res of
Yield x s -> do
let jump c = SplitOnSeqSingle c s pat
if pat == x
then final fs >>= yieldReinit jump
else do
r <- fstep fs x
case r of
FL.Partial fs1 ->
pure $ Skip $ SplitOnSeqSingle fs1 s pat
FL.Done b -> yieldReinit jump b
Skip s -> pure $ Skip $ SplitOnSeqSingle0 fs s pat
Stop -> final fs >> pure Stop

stepOuter gst (SplitOnSeqSingle fs0 st0 pat) = do
go SPEC fs0 st0

Expand Down Expand Up @@ -2938,8 +2958,17 @@ splitSepBySeq_ patArr (Fold fstep initial _ final) (Stream step state) =
let jump c = SplitOnSeqWordDone (n - 1) c wrd
yieldReinit jump b

stepOuter gst (SplitOnSeqWordInit fs st0) =
go SPEC 0 0 st0
stepOuter gst (SplitOnSeqWordInit0 fs st) = do
res <- step (adaptState gst) st
case res of
Yield x s ->
let wrd1 = addToWord 0 x
in pure $ Skip $ SplitOnSeqWordInit 1 wrd1 fs s
Skip s -> pure $ Skip $ SplitOnSeqWordInit0 fs s
Stop -> final fs >> pure Stop

stepOuter gst (SplitOnSeqWordInit idx0 wrd0 fs st0) =
go SPEC idx0 wrd0 st0

where

Expand All @@ -2953,7 +2982,7 @@ splitSepBySeq_ patArr (Fold fstep initial _ final) (Stream step state) =
then do
if wrd1 .&. wordMask == wordPat
then do
let jump c = SplitOnSeqWordInit c s
let jump c = SplitOnSeqWordInit 0 0 c s
final fs >>= yieldReinit jump
else skip $ SplitOnSeqWordLoop wrd1 s fs
else go SPEC (idx + 1) wrd1 s
Expand All @@ -2977,7 +3006,7 @@ splitSepBySeq_ patArr (Fold fstep initial _ final) (Stream step state) =
res <- step (adaptState gst) st
case res of
Yield x s -> do
let jump c = SplitOnSeqWordInit c s
let jump c = SplitOnSeqWordInit 0 0 c s
wrd1 = addToWord wrd x
old = (wordMask .&. wrd)
`shiftR` (elemBits * (patLen - 1))
Expand All @@ -3000,6 +3029,15 @@ splitSepBySeq_ patArr (Fold fstep initial _ final) (Stream step state) =
-- manipulated locally e.g. we are passing only mba, here and build an
-- array using patLen and arrStart from the surrounding context.

stepOuter gst (SplitOnSeqKRInit0 offset fs st mba) = do
res <- step (adaptState gst) st
case res of
Yield x s -> do
liftIO $ pokeAt offset mba x
skip $ SplitOnSeqKRInit (offset + SIZE_OF(a)) fs s mba
Skip s -> skip $ SplitOnSeqKRInit0 offset fs s mba
Stop -> final fs >> pure Stop

stepOuter gst (SplitOnSeqKRInit offset fs st mba) = do
res <- step (adaptState gst) st
case res of
Expand Down
17 changes: 14 additions & 3 deletions test/Streamly/Test/Data/Stream.hs
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,12 @@ splitOnSeq ::
splitOnSeq op = do
describe "Tests for splitOnSeq" $ do
-- Empty pattern case
it "splitOnSeq_ \"\" \"\" = []"
$ splitOnSeq_ "" "" `shouldReturn` []

-- Single element pattern cases
it "splitOnSeq_ \"x\" \"\" = []"
$ splitOnSeq_ "x" "" `shouldReturn` []
it "splitOnSeq_ \"x\" \"hello\" = [\"hello\"]"
$ splitOnSeq_ "x" "hello" `shouldReturn` ["hello"]
it "splitOnSeq_ \"h\" \"hello\" = [\"\", \"ello\"]"
Expand All @@ -117,6 +121,8 @@ splitOnSeq op = do
$ splitOnSeq_ "o" "hello" `shouldReturn` ["hell", ""]

-- multi-element pattern fitting in a Word
it "splitOnSeq_ \"he\" \"\" = []"
$ splitOnSeq_ "he" "" `shouldReturn` []
it "splitOnSeq_ \"he\" \"hello\" = [\"\", \"llo\"]"
$ splitOnSeq_ "he" "hello" `shouldReturn` ["", "llo"]
it "splitOnSeq_ \"ll\" \"hello\" = [\"he\", \"o\"]"
Expand All @@ -125,8 +131,8 @@ splitOnSeq op = do
$ splitOnSeq_ "lo" "hello" `shouldReturn` ["hel", ""]

-- multi-element pattern - Rabin-Karp cases
it "splitOnSeq_ \"hello\" \"\" = [\"\"]"
$ splitOnSeq_ "hello" "" `shouldReturn` [""]
it "splitOnSeq_ \"hello\" \"\" = []"
$ splitOnSeq_ "hello" "" `shouldReturn` []
it "splitOnSeq_ \"hel\" \"hello\" = [\"\", \"lo\"]"
$ splitOnSeq_ "hel" "hello" `shouldReturn` ["", "lo"]
it "splitOnSeq_ \"ell\" \"hello\" = [\"h\", \"o\"]"
Expand Down Expand Up @@ -403,7 +409,12 @@ intercalateSplitOnId x desc =
groupSplitOps :: String -> Spec
groupSplitOps desc = do
-- splitting
splitOnSeq splitOnSeqFold

-- The foldManyPost implementation on an empty stream produces a single
-- value. The behaviour of foldManyPost implementation and the direct stream
-- implementation is not different.
-- splitOnSeq splitOnSeqFold

splitOnSeq splitOnSeqStream
splitOnSuffixSeq splitOnSuffixSeqFold

Expand Down

0 comments on commit 4557f89

Please sign in to comment.