Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make empty stream behavior of splitSepBySeq_ similar to splitSepBy_ #2912

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 46 additions & 8 deletions core/src/Streamly/Internal/Data/Stream/Nesting.hs
Original file line number Diff line number Diff line change
Expand Up @@ -2758,12 +2758,15 @@ data SplitOnSeqState mba rb rh ck w fs s b x =

| SplitOnSeqEmpty !fs s

| SplitOnSeqSingle0 !fs s x
| SplitOnSeqSingle !fs s x

| SplitOnSeqWordInit !fs s
| SplitOnSeqWordInit0 !fs s
| SplitOnSeqWordInit Int Word !fs s
| SplitOnSeqWordLoop !w s !fs
| SplitOnSeqWordDone Int !fs !w

| SplitOnSeqKRInit0 Int !fs s mba
| SplitOnSeqKRInit Int !fs s mba
| SplitOnSeqKRLoop fs s mba !rh !ck
| SplitOnSeqKRCheck fs s mba !rh
Expand Down Expand Up @@ -2846,13 +2849,13 @@ splitSepBySeq_ patArr (Fold fstep initial _ final) (Stream step state) =
return $ Skip $ SplitOnSeqEmpty acc state
| patLen == 1 -> do
pat <- liftIO $ A.unsafeIndexIO 0 patArr
return $ Skip $ SplitOnSeqSingle acc state pat
return $ Skip $ SplitOnSeqSingle0 acc state pat
| SIZE_OF(a) * patLen <= sizeOf (Proxy :: Proxy Word) ->
return $ Skip $ SplitOnSeqWordInit acc state
return $ Skip $ SplitOnSeqWordInit0 acc state
| otherwise -> do
(MutArray mba _ _ _) :: MutArray a <-
liftIO $ MutArray.emptyOf patLen
skip $ SplitOnSeqKRInit 0 acc state mba
skip $ SplitOnSeqKRInit0 0 acc state mba
FL.Done b -> skip $ SplitOnSeqYield b SplitOnSeqInit

stepOuter _ (SplitOnSeqYield x next) = return $ Yield x next
Expand Down Expand Up @@ -2892,6 +2895,23 @@ splitSepBySeq_ patArr (Fold fstep initial _ final) (Stream step state) =
-- Single Pattern
-----------------

-- TODO: Commonize the Yield part and check the performance
stepOuter gst (SplitOnSeqSingle0 fs st pat) = do
res <- step (adaptState gst) st
case res of
Yield x s -> do
let jump c = SplitOnSeqSingle c s pat
if pat == x
then final fs >>= yieldReinit jump
else do
r <- fstep fs x
case r of
FL.Partial fs1 ->
pure $ Skip $ SplitOnSeqSingle fs1 s pat
FL.Done b -> yieldReinit jump b
Skip s -> pure $ Skip $ SplitOnSeqSingle0 fs s pat
Stop -> final fs >> pure Stop

stepOuter gst (SplitOnSeqSingle fs0 st0 pat) = do
go SPEC fs0 st0

Expand Down Expand Up @@ -2938,8 +2958,17 @@ splitSepBySeq_ patArr (Fold fstep initial _ final) (Stream step state) =
let jump c = SplitOnSeqWordDone (n - 1) c wrd
yieldReinit jump b

stepOuter gst (SplitOnSeqWordInit fs st0) =
go SPEC 0 0 st0
stepOuter gst (SplitOnSeqWordInit0 fs st) = do
res <- step (adaptState gst) st
case res of
Yield x s ->
let wrd1 = addToWord 0 x
in pure $ Skip $ SplitOnSeqWordInit 1 wrd1 fs s
Skip s -> pure $ Skip $ SplitOnSeqWordInit0 fs s
Stop -> final fs >> pure Stop

stepOuter gst (SplitOnSeqWordInit idx0 wrd0 fs st0) =
go SPEC idx0 wrd0 st0

where

Expand All @@ -2953,7 +2982,7 @@ splitSepBySeq_ patArr (Fold fstep initial _ final) (Stream step state) =
then do
if wrd1 .&. wordMask == wordPat
then do
let jump c = SplitOnSeqWordInit c s
let jump c = SplitOnSeqWordInit 0 0 c s
final fs >>= yieldReinit jump
else skip $ SplitOnSeqWordLoop wrd1 s fs
else go SPEC (idx + 1) wrd1 s
Expand All @@ -2977,7 +3006,7 @@ splitSepBySeq_ patArr (Fold fstep initial _ final) (Stream step state) =
res <- step (adaptState gst) st
case res of
Yield x s -> do
let jump c = SplitOnSeqWordInit c s
let jump c = SplitOnSeqWordInit 0 0 c s
wrd1 = addToWord wrd x
old = (wordMask .&. wrd)
`shiftR` (elemBits * (patLen - 1))
Expand All @@ -3000,6 +3029,15 @@ splitSepBySeq_ patArr (Fold fstep initial _ final) (Stream step state) =
-- manipulated locally e.g. we are passing only mba, here and build an
-- array using patLen and arrStart from the surrounding context.

stepOuter gst (SplitOnSeqKRInit0 offset fs st mba) = do
res <- step (adaptState gst) st
case res of
Yield x s -> do
liftIO $ pokeAt offset mba x
skip $ SplitOnSeqKRInit (offset + SIZE_OF(a)) fs s mba
Skip s -> skip $ SplitOnSeqKRInit0 offset fs s mba
Stop -> final fs >> pure Stop

stepOuter gst (SplitOnSeqKRInit offset fs st mba) = do
res <- step (adaptState gst) st
case res of
Expand Down
17 changes: 14 additions & 3 deletions test/Streamly/Test/Data/Stream.hs
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,12 @@ splitOnSeq ::
splitOnSeq op = do
describe "Tests for splitOnSeq" $ do
-- Empty pattern case
it "splitOnSeq_ \"\" \"\" = []"
$ splitOnSeq_ "" "" `shouldReturn` []

-- Single element pattern cases
it "splitOnSeq_ \"x\" \"\" = []"
$ splitOnSeq_ "x" "" `shouldReturn` []
it "splitOnSeq_ \"x\" \"hello\" = [\"hello\"]"
$ splitOnSeq_ "x" "hello" `shouldReturn` ["hello"]
it "splitOnSeq_ \"h\" \"hello\" = [\"\", \"ello\"]"
Expand All @@ -117,6 +121,8 @@ splitOnSeq op = do
$ splitOnSeq_ "o" "hello" `shouldReturn` ["hell", ""]

-- multi-element pattern fitting in a Word
it "splitOnSeq_ \"he\" \"\" = []"
$ splitOnSeq_ "he" "" `shouldReturn` []
it "splitOnSeq_ \"he\" \"hello\" = [\"\", \"llo\"]"
$ splitOnSeq_ "he" "hello" `shouldReturn` ["", "llo"]
it "splitOnSeq_ \"ll\" \"hello\" = [\"he\", \"o\"]"
Expand All @@ -125,8 +131,8 @@ splitOnSeq op = do
$ splitOnSeq_ "lo" "hello" `shouldReturn` ["hel", ""]

-- multi-element pattern - Rabin-Karp cases
it "splitOnSeq_ \"hello\" \"\" = [\"\"]"
$ splitOnSeq_ "hello" "" `shouldReturn` [""]
it "splitOnSeq_ \"hello\" \"\" = []"
$ splitOnSeq_ "hello" "" `shouldReturn` []
it "splitOnSeq_ \"hel\" \"hello\" = [\"\", \"lo\"]"
$ splitOnSeq_ "hel" "hello" `shouldReturn` ["", "lo"]
it "splitOnSeq_ \"ell\" \"hello\" = [\"h\", \"o\"]"
Expand Down Expand Up @@ -403,7 +409,12 @@ intercalateSplitOnId x desc =
groupSplitOps :: String -> Spec
groupSplitOps desc = do
-- splitting
splitOnSeq splitOnSeqFold

-- The foldManyPost implementation on an empty stream produces a single
-- value. The behaviour of foldManyPost implementation and the direct stream
-- implementation is not different.
-- splitOnSeq splitOnSeqFold

splitOnSeq splitOnSeqStream
splitOnSuffixSeq splitOnSuffixSeqFold

Expand Down
Loading