Skip to content

Commit

Permalink
Track the absolute position in the drivers of Parser
Browse files Browse the repository at this point in the history
  • Loading branch information
adithyaov committed Oct 11, 2024
1 parent ddfe7ae commit 0719551
Show file tree
Hide file tree
Showing 17 changed files with 363 additions and 354 deletions.
2 changes: 1 addition & 1 deletion benchmark/Streamly/Benchmark/Data/Parser.hs
Original file line number Diff line number Diff line change
Expand Up @@ -709,7 +709,7 @@ moduleName = "Data.Parser"

instance NFData ParseError where
{-# INLINE rnf #-}
rnf (ParseError x) = rnf x
rnf (ParseError i x) = rnf i `seq` rnf x

o_1_space_serial :: Int -> [Benchmark]
o_1_space_serial value =
Expand Down
2 changes: 1 addition & 1 deletion benchmark/Streamly/Benchmark/Data/ParserK.hs
Original file line number Diff line number Diff line change
Expand Up @@ -370,7 +370,7 @@ moduleName = MODULE_NAME

instance NFData ParseError where
{-# INLINE rnf #-}
rnf (ParseError x) = rnf x
rnf (ParseError i x) = rnf i `seq` rnf x

o_1_space_serial :: Int -> [Benchmark]
o_1_space_serial value =
Expand Down
2 changes: 1 addition & 1 deletion benchmark/Streamly/Benchmark/Unicode/Parser.hs
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ moduleName = "Unicode.Parser"

instance NFData ParseError where
{-# INLINE rnf #-}
rnf (ParseError x) = rnf x
rnf (ParseError i x) = rnf i `seq` rnf x

o_n_heap_serial :: Int -> [Benchmark]
o_n_heap_serial value =
Expand Down
48 changes: 24 additions & 24 deletions core/src/Streamly/Internal/Data/Array.hs
Original file line number Diff line number Diff line change
Expand Up @@ -907,9 +907,9 @@ parseBreakChunksK ::
parseBreakChunksK (Parser pstep initial extract) stream = do
res <- initial
case res of
IPartial s -> go s stream []
IPartial s -> go s stream [] 0
IDone b -> return (Right b, stream)
IError err -> return (Left (ParseError err), stream)
IError err -> return (Left (ParseError 0 err), stream)

where

Expand All @@ -919,37 +919,37 @@ parseBreakChunksK (Parser pstep initial extract) stream = do
-- XXX currently we are using a dumb list based approach for backtracking
-- buffer. This can be replaced by a sliding/ring buffer using Data.Array.
-- That will allow us more efficient random back and forth movement.
go !pst st backBuf = do
let stop = goStop pst backBuf -- (, K.nil) <$> extract pst
go !pst st backBuf i = do
let stop = goStop pst backBuf i -- (, K.nil) <$> extract pst
single a = yieldk a StreamK.nil
yieldk arr r = goArray pst backBuf r arr
yieldk arr r = goArray pst backBuf r arr i
in StreamK.foldStream defState yieldk single stop st

-- Use strictness on "cur" to keep it unboxed
goArray !pst backBuf st (Array _ cur end) | cur == end = go pst st backBuf
goArray !pst backBuf st (Array contents cur end) = do
goArray !pst backBuf st (Array _ cur end) i | cur == end = go pst st backBuf i
goArray !pst backBuf st (Array contents cur end) i = do
x <- liftIO $ peekAt cur contents
pRes <- pstep pst x
let next = INDEX_NEXT(cur,a)
case pRes of
Parser.Partial 0 s ->
goArray s [] st (Array contents next end)
goArray s [] st (Array contents next end) (i + 1)
Parser.Partial n s -> do
assert (n <= Prelude.length (x:backBuf)) (return ())
let src0 = Prelude.take n (x:backBuf)
arr0 = fromListN n (Prelude.reverse src0)
arr1 = Array contents next end
src = arr0 <> arr1
goArray s [] st src
goArray s [] st src (i + 1 - n)
Parser.Continue 0 s ->
goArray s (x:backBuf) st (Array contents next end)
goArray s (x:backBuf) st (Array contents next end) (i + 1)
Parser.Continue n s -> do
assert (n <= Prelude.length (x:backBuf)) (return ())
let (src0, buf1) = Prelude.splitAt n (x:backBuf)
arr0 = fromListN n (Prelude.reverse src0)
arr1 = Array contents next end
src = arr0 <> arr1
goArray s buf1 st src
goArray s buf1 st src (i + 1 - n)
Parser.Done 0 b -> do
let arr = Array contents next end
return (Right b, StreamK.cons arr st)
Expand All @@ -967,34 +967,34 @@ parseBreakChunksK (Parser pstep initial extract) stream = do
arr0 = fromListN n (Prelude.reverse backBuf)
arr1 = Array contents cur end
str = StreamK.cons arr0 (StreamK.cons arr1 st)
return (Left (ParseError err), str)
return (Left (ParseError (i + 1) err), str)

-- This is a simplified goArray
goExtract !pst backBuf (Array _ cur end)
| cur == end = goStop pst backBuf
goExtract !pst backBuf (Array contents cur end) = do
goExtract !pst backBuf (Array _ cur end) i
| cur == end = goStop pst backBuf i
goExtract !pst backBuf (Array contents cur end) i = do
x <- liftIO $ peekAt cur contents
pRes <- pstep pst x
let next = INDEX_NEXT(cur,a)
case pRes of
Parser.Partial 0 s ->
goExtract s [] (Array contents next end)
goExtract s [] (Array contents next end) (i + 1)
Parser.Partial n s -> do
assert (n <= Prelude.length (x:backBuf)) (return ())
let src0 = Prelude.take n (x:backBuf)
arr0 = fromListN n (Prelude.reverse src0)
arr1 = Array contents next end
src = arr0 <> arr1
goExtract s [] src
goExtract s [] src (i + 1 - n)
Parser.Continue 0 s ->
goExtract s backBuf (Array contents next end)
goExtract s backBuf (Array contents next end) (i + 1)
Parser.Continue n s -> do
assert (n <= Prelude.length (x:backBuf)) (return ())
let (src0, buf1) = Prelude.splitAt n (x:backBuf)
arr0 = fromListN n (Prelude.reverse src0)
arr1 = Array contents next end
src = arr0 <> arr1
goExtract s buf1 src
goExtract s buf1 src (i + 1 - n)
Parser.Done 0 b -> do
let arr = Array contents next end
return (Right b, StreamK.fromPure arr)
Expand All @@ -1012,21 +1012,21 @@ parseBreakChunksK (Parser pstep initial extract) stream = do
arr0 = fromListN n (Prelude.reverse backBuf)
arr1 = Array contents cur end
str = StreamK.cons arr0 (StreamK.fromPure arr1)
return (Left (ParseError err), str)
return (Left (ParseError (i + 1) err), str)

-- This is a simplified goExtract
{-# INLINE goStop #-}
goStop !pst backBuf = do
goStop !pst backBuf i = do
pRes <- extract pst
case pRes of
Parser.Partial _ _ -> error "Bug: parseBreak: Partial in extract"
Parser.Continue 0 s ->
goStop s backBuf
goStop s backBuf i
Parser.Continue n s -> do
assert (n <= Prelude.length backBuf) (return ())
let (src0, buf1) = Prelude.splitAt n backBuf
arr = fromListN n (Prelude.reverse src0)
goExtract s buf1 arr
goExtract s buf1 arr (i - n)
Parser.Done 0 b ->
return (Right b, StreamK.nil)
Parser.Done n b -> do
Expand All @@ -1039,4 +1039,4 @@ parseBreakChunksK (Parser pstep initial extract) stream = do
Parser.Error err -> do
let n = Prelude.length backBuf
arr0 = fromListN n (Prelude.reverse backBuf)
return (Left (ParseError err), StreamK.fromPure arr0)
return (Left (ParseError i err), StreamK.fromPure arr0)
22 changes: 11 additions & 11 deletions core/src/Streamly/Internal/Data/Array/Stream.hs
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,7 @@ runArrayParserDBreak
case res of
PRD.IPartial s -> go SPEC state (List []) s
PRD.IDone b -> return (Right b, stream)
PRD.IError err -> return (Left (ParseError err), stream)
PRD.IError err -> return (Left (ParseError (-1) err), stream)

where

Expand Down Expand Up @@ -374,7 +374,7 @@ runArrayParserDBreak
let src0 = x:getList backBuf
src = Prelude.reverse src0 ++ x:xs
strm = D.append (D.fromList src) (D.Stream step s)
return (Left (ParseError err), strm)
return (Left (ParseError (-1) err), strm)

-- This is a simplified gobuf
goExtract _ [] backBuf !pst = goStop backBuf pst
Expand Down Expand Up @@ -411,7 +411,7 @@ runArrayParserDBreak
PR.Error err -> do
let src0 = getList backBuf
src = Prelude.reverse src0 ++ x:xs
return (Left (ParseError err), D.fromList src)
return (Left (ParseError (-1) err), D.fromList src)

-- This is a simplified goExtract
{-# INLINE goStop #-}
Expand Down Expand Up @@ -439,7 +439,7 @@ runArrayParserDBreak
PR.Error err -> do
let src0 = getList backBuf
src = Prelude.reverse src0
return (Left (ParseError err), D.fromList src)
return (Left (ParseError (-1) err), D.fromList src)

{-
-- | Parse an array stream using the supplied 'Parser'. Returns the parse
Expand Down Expand Up @@ -517,7 +517,7 @@ runArrayFoldManyD
let next = ParseChunksInitLeftOver []
return
$ D.Skip
$ ParseChunksYield (Left (ParseError err)) next
$ ParseChunksYield (Left (ParseError (-1) err)) next
D.Skip s -> return $ D.Skip $ ParseChunksInit [] s
D.Stop -> return D.Stop

Expand All @@ -534,7 +534,7 @@ runArrayFoldManyD
let next = ParseChunksInitLeftOver []
return
$ D.Skip
$ ParseChunksYield (Left (ParseError err)) next
$ ParseChunksYield (Left (ParseError (-1) err)) next

-- This is a simplified ParseChunksInit
stepOuter _ (ParseChunksInitBuf src) = do
Expand All @@ -549,7 +549,7 @@ runArrayFoldManyD
let next = ParseChunksInitLeftOver []
return
$ D.Skip
$ ParseChunksYield (Left (ParseError err)) next
$ ParseChunksYield (Left (ParseError (-1) err)) next

-- XXX we just discard any leftover input at the end
stepOuter _ (ParseChunksInitLeftOver _) = return D.Stop
Expand Down Expand Up @@ -596,7 +596,7 @@ runArrayFoldManyD
let next = ParseChunksInitLeftOver []
return
$ D.Skip
$ ParseChunksYield (Left (ParseError err)) next
$ ParseChunksYield (Left (ParseError (-1) err)) next

D.Skip s -> return $ D.Skip $ ParseChunksStream s backBuf pst
D.Stop -> return $ D.Skip $ ParseChunksStop backBuf pst
Expand Down Expand Up @@ -638,7 +638,7 @@ runArrayFoldManyD
let next = ParseChunksInitLeftOver []
return
$ D.Skip
$ ParseChunksYield (Left (ParseError err)) next
$ ParseChunksYield (Left (ParseError (-1) err)) next

-- This is a simplified ParseChunksBuf
stepOuter _ (ParseChunksExtract [] buf pst) =
Expand Down Expand Up @@ -676,7 +676,7 @@ runArrayFoldManyD
let next = ParseChunksInitLeftOver []
return
$ D.Skip
$ ParseChunksYield (Left (ParseError err)) next
$ ParseChunksYield (Left (ParseError (-1) err)) next


-- This is a simplified ParseChunksExtract
Expand Down Expand Up @@ -706,7 +706,7 @@ runArrayFoldManyD
let next = ParseChunksInitLeftOver []
return
$ D.Skip
$ ParseChunksYield (Left (ParseError err)) next
$ ParseChunksYield (Left (ParseError (-1) err)) next

stepOuter _ (ParseChunksYield a next) = return $ D.Yield a next

Expand Down
20 changes: 10 additions & 10 deletions core/src/Streamly/Internal/Data/Parser.hs
Original file line number Diff line number Diff line change
Expand Up @@ -619,7 +619,7 @@ data Tuple'Fused a b = Tuple'Fused !a !b deriving Show
-- Right [1,2]
--
-- >>> takeBetween' 2 4 [1]
-- Left (ParseError "takeBetween: Expecting alteast 2 elements, got 1")
-- Left (ParseError 1 "takeBetween: Expecting alteast 2 elements, got 1")
--
-- >>> takeBetween' 0 0 [1, 2]
-- Right []
Expand Down Expand Up @@ -721,7 +721,7 @@ takeBetween low high (Fold fstep finitial _ ffinal) =
-- Right [1,0]
--
-- >>> Stream.parse (Parser.takeEQ 4 Fold.toList) $ Stream.fromList [1,0,1]
-- Left (ParseError "takeEQ: Expecting exactly 4 elements, input terminated on 3")
-- Left (ParseError 3 "takeEQ: Expecting exactly 4 elements, input terminated on 3")
--
{-# INLINE takeEQ #-}
takeEQ :: Monad m => Int -> Fold m a b -> Parser a m b
Expand Down Expand Up @@ -782,7 +782,7 @@ data TakeGEState s =
-- elements.
--
-- >>> Stream.parse (Parser.takeGE 4 Fold.toList) $ Stream.fromList [1,0,1]
-- Left (ParseError "takeGE: Expecting at least 4 elements, input terminated on 3")
-- Left (ParseError 3 "takeGE: Expecting at least 4 elements, input terminated on 3")
--
-- >>> Stream.parse (Parser.takeGE 4 Fold.toList) $ Stream.fromList [1,0,1,0,1]
-- Right [1,0,1,0,1]
Expand Down Expand Up @@ -1294,7 +1294,7 @@ takeEitherSepBy _cond = undefined -- D.toParserK . D.takeEitherSepBy cond
-- >>> p = Parser.takeBeginBy (== ',') Fold.toList
-- >>> leadingComma = Stream.parse p . Stream.fromList
-- >>> leadingComma "a,b"
-- Left (ParseError "takeBeginBy: missing frame start")
-- Left (ParseError 1 "takeBeginBy: missing frame start")
-- ...
-- >>> leadingComma ",,"
-- Right ","
Expand Down Expand Up @@ -1372,7 +1372,7 @@ RENAME(takeStartBy_,takeBeginBy_)
-- >>> Stream.parse p $ Stream.fromList "{hello \\{world}"
-- Right "hello {world"
-- >>> Stream.parse p $ Stream.fromList "{hello {world}"
-- Left (ParseError "takeFramedByEsc_: missing frame end")
-- Left (ParseError 1 "takeFramedByEsc_: missing frame end")
--
-- /Pre-release/
{-# INLINE takeFramedByEsc_ #-}
Expand Down Expand Up @@ -2115,7 +2115,7 @@ groupByRollingEither
-- Right "string"
--
-- >>> Stream.parse (Parser.listEqBy (==) "mismatch") $ Stream.fromList "match"
-- Left (ParseError "streamEqBy: mismtach occurred")
-- Left (ParseError 2 "streamEqBy: mismtach occurred")
--
{-# INLINE listEqBy #-}
listEqBy :: Monad m => (a -> a -> Bool) -> [a] -> Parser a m [a]
Expand Down Expand Up @@ -2406,7 +2406,7 @@ spanByRolling eq f1 f2 =
-- Right [1,2]
--
-- >>> Stream.parse (Parser.takeP 4 (Parser.takeEQ 5 Fold.toList)) $ Stream.fromList [1, 2, 3, 4, 5]
-- Left (ParseError "takeEQ: Expecting exactly 5 elements, input terminated on 4")
-- Left (ParseError 4 "takeEQ: Expecting exactly 5 elements, input terminated on 4")
--
-- /Internal/
{-# INLINE takeP #-}
Expand Down Expand Up @@ -2563,7 +2563,7 @@ data DeintercalateAllState fs sp ss =
-- >>> Stream.parse p $ Stream.fromList "1"
-- Right [Left "1"]
-- >>> Stream.parse p $ Stream.fromList "1+"
-- Left (ParseError "takeWhile1: end of input")
-- Left (ParseError 1 "takeWhile1: end of input")
-- >>> Stream.parse p $ Stream.fromList "1+2+3"
-- Right [Left "1",Right '+',Left "2",Right '+',Left "3"]
--
Expand Down Expand Up @@ -2839,7 +2839,7 @@ data Deintercalate1State b fs sp ss =
-- >>> p2 = Parser.satisfy (== '+')
-- >>> p = Parser.deintercalate1 p1 p2 Fold.toList
-- >>> Stream.parse p $ Stream.fromList ""
-- Left (ParseError "takeWhile1: end of input")
-- Left (ParseError 0 "takeWhile1: end of input")
-- >>> Stream.parse p $ Stream.fromList "1"
-- Right [Left "1"]
-- >>> Stream.parse p $ Stream.fromList "1+"
Expand Down Expand Up @@ -3140,7 +3140,7 @@ sepBy1 p sep sink = do
-- >>> p2 = Parser.satisfy (== '+')
-- >>> p = Parser.sepBy1 p1 p2 Fold.toList
-- >>> Stream.parse p $ Stream.fromList ""
-- Left (ParseError "takeWhile1: end of input")
-- Left (ParseError 0 "takeWhile1: end of input")
-- >>> Stream.parse p $ Stream.fromList "1"
-- Right ["1"]
-- >>> Stream.parse p $ Stream.fromList "1+"
Expand Down
5 changes: 3 additions & 2 deletions core/src/Streamly/Internal/Data/Parser/Type.hs
Original file line number Diff line number Diff line change
Expand Up @@ -455,11 +455,12 @@ data Fold m a b =
--
-- /Pre-release/
--
newtype ParseError = ParseError String
data ParseError = ParseError Int String
deriving (Eq, Show)

instance Exception ParseError where
displayException (ParseError err) = err
-- XXX Append the index in the error message here?
displayException (ParseError _ err) = err

-- | Map a function on the result i.e. on @b@ in @Parser a m b@.
instance Functor m => Functor (Parser a m) where
Expand Down
Loading

0 comments on commit 0719551

Please sign in to comment.