@@ -20,6 +20,7 @@ module Json
2020 , ToValue (.. )
2121 -- * Functions
2222 , decode
23+ , decodeNewlineDelimited
2324 , encode
2425 -- * Infix Synonyms
2526 , pattern (:->)
@@ -78,9 +79,13 @@ import GHC.Exts (Char(C#),Int(I#),gtWord#,ltWord#,word2Int#,chr#)
7879import GHC.Word (Word8 ,Word16 ,Word32 ,Word64 )
7980import GHC.Int (Int8 ,Int16 ,Int32 ,Int64 )
8081import Data.Text (Text )
82+ import Data.Foldable (foldlM )
83+ import Control.Monad.Trans.Except (runExceptT ,except )
84+ import Control.Monad.Trans.Class (lift )
8185
8286import qualified Prelude
8387import qualified Data.Builder.ST as B
88+ import qualified Data.Bytes as Bytes
8489import qualified Data.Bytes.Builder as BLDR
8590import qualified Data.Bytes.Parser as P
8691import qualified Data.Chunks as Chunks
@@ -142,6 +147,7 @@ data SyntaxException
142147 | InvalidNumber
143148 | LeadingZero
144149 | UnexpectedLeftovers
150+ | PossibleOverflow
145151 deriving stock (Eq ,Show )
146152 deriving anyclass (Exception )
147153
@@ -172,13 +178,57 @@ isSpace w =
172178
173179-- | Decode a JSON syntax tree from a byte sequence.
174180decode :: Bytes -> Either SyntaxException Value
175- decode = P. parseBytesEither do
181+ {-# noinline decode #-}
182+ decode = P. parseBytesEither parser
183+
184+ parser :: Parser SyntaxException s Value
185+ {-# inline parser #-}
186+ parser = do
176187 P. skipWhile isSpace
177- result <- Latin. any EmptyInput >>= parser
188+ result <- Latin. any EmptyInput >>= parserStep
178189 P. skipWhile isSpace
179190 P. endOfInput UnexpectedLeftovers
180191 pure result
181192
193+ -- | Decode newline-delimited JSON. Both the LF and the CRLF conventions
194+ -- are supported. The newline character (or character sequence) following
195+ -- the final object may be omitted. This also allows blanks lines consisting
196+ -- of only whitespace.
197+ --
198+ -- It's not strictly necessary for this to be a part of this library, but
199+ -- newline-delimited JSON is somewhat common in practice. It's nice to have
200+ -- this here instead of having to reimplement it in a bunch of different
201+ -- applications.
202+ --
203+ -- Note: To protect against malicious input, this reject byte sequences with
204+ -- more than 10 million newlines. If this is causing a problem for you, open
205+ -- an issue.
206+ --
207+ -- Other note: in the future, this function might be changed transparently
208+ -- to parallelize the decoding of large input (at least 1000 lines) with
209+ -- GHC sparks.
210+ decodeNewlineDelimited :: Bytes -> Either SyntaxException (SmallArray Value )
211+ {-# noinline decodeNewlineDelimited #-}
212+ decodeNewlineDelimited ! everything =
213+ let maxVals = Bytes. count 0x0A everything + 1
214+ in if maxVals > 10000000
215+ then Left PossibleOverflow
216+ else runST $ runExceptT $ do
217+ ! dst <- PM. newSmallArray maxVals Null
218+ ! total <- foldlM
219+ (\ ! ix b ->
220+ let clean = Bytes. dropWhile isSpace (Bytes. dropWhileEnd isSpace b)
221+ in if Bytes. null clean
222+ then pure ix
223+ else do
224+ v <- except (decode clean)
225+ lift (PM. writeSmallArray dst ix v)
226+ pure (ix + 1 )
227+ ) 0 (Bytes. split 0x0A everything)
228+ lift $ PM. shrinkSmallMutableArray dst total
229+ dst' <- lift $ PM. unsafeFreezeSmallArray dst
230+ pure dst'
231+
182232-- | Encode a JSON syntax tree.
183233encode :: Value -> BLDR. Builder
184234{-# noinline encode #-}
@@ -229,9 +279,10 @@ foldrTail f z !ary = go 1 where
229279 = f x (go (i+ 1 ))
230280
231281-- Precondition: skip over all space before calling this.
232- -- It will not skip leading space for you. It does
233- parser :: Char -> Parser SyntaxException s Value
234- parser = \ case
282+ -- It will not skip leading space for you. It does not skip
283+ -- over trailing space either.
284+ parserStep :: Char -> Parser SyntaxException s Value
285+ parserStep = \ case
235286 ' {' -> objectTrailedByBrace
236287 ' [' -> arrayTrailedByBracket
237288 ' t' -> do
@@ -266,7 +317,7 @@ objectTrailedByBrace = do
266317 P. skipWhile isSpace
267318 Latin. char ExpectedColon ' :'
268319 P. skipWhile isSpace
269- val <- Latin. any IncompleteObject >>= parser
320+ val <- Latin. any IncompleteObject >>= parserStep
270321 let ! mbr = Member theKey val
271322 ! b0 <- P. effect B. new
272323 b1 <- P. effect (B. push mbr b0)
@@ -285,7 +336,7 @@ objectStep !b = do
285336 P. skipWhile isSpace
286337 Latin. char ExpectedColon ' :'
287338 P. skipWhile isSpace
288- val <- Latin. any IncompleteObject >>= parser
339+ val <- Latin. any IncompleteObject >>= parserStep
289340 let ! mbr = Member theKey val
290341 P. effect (B. push mbr b) >>= objectStep
291342 ' }' -> do
@@ -310,7 +361,7 @@ arrayTrailedByBracket = do
310361 ' ]' -> pure emptyArray
311362 c -> do
312363 ! b0 <- P. effect B. new
313- val <- parser c
364+ val <- parserStep c
314365 b1 <- P. effect (B. push val b0)
315366 arrayStep b1
316367
@@ -328,7 +379,7 @@ arrayStep !b = do
328379 Latin. any IncompleteArray >>= \ case
329380 ' ,' -> do
330381 P. skipWhile isSpace
331- val <- Latin. any IncompleteArray >>= parser
382+ val <- Latin. any IncompleteArray >>= parserStep
332383 P. effect (B. push val b) >>= arrayStep
333384 ' ]' -> do
334385 ! r <- P. effect (B. freeze b)
0 commit comments