1 {-# LANGUAGE CPP, BangPatterns #-}
    2 {-# OPTIONS_GHC -fno-warn-incomplete-patterns #-}
    3 {-# OPTIONS_HADDOCK prune #-}
    4 #if __GLASGOW_HASKELL__ >= 701
    5 {-# LANGUAGE Trustworthy #-}
    6 #endif
    7 
    8 -- |
    9 -- Module      : Data.ByteString.Lazy
   10 -- Copyright   : (c) Don Stewart 2006
   11 --               (c) Duncan Coutts 2006-2011
   12 -- License     : BSD-style
   13 --
   14 -- Maintainer  : dons00@gmail.com, duncan@community.haskell.org
   15 -- Stability   : stable
   16 -- Portability : portable
   17 --
   18 -- A time and space-efficient implementation of lazy byte vectors
   19 -- using lists of packed 'Word8' arrays, suitable for high performance
   20 -- use, both in terms of large data quantities, or high speed
   21 -- requirements. Lazy ByteStrings are encoded as lazy lists of strict chunks
   22 -- of bytes.
   23 --
   24 -- A key feature of lazy ByteStrings is the means to manipulate large or
   25 -- unbounded streams of data without requiring the entire sequence to be
   26 -- resident in memory. To take advantage of this you have to write your
   27 -- functions in a lazy streaming style, e.g. classic pipeline composition. The
   28 -- default I\/O chunk size is 32k, which should be good in most circumstances.
   29 --
   30 -- Some operations, such as 'concat', 'append', 'reverse' and 'cons', have
   31 -- better complexity than their "Data.ByteString" equivalents, due to
   32 -- optimisations resulting from the list spine structure. For other
   33 -- operations lazy ByteStrings are usually within a few percent of
   34 -- strict ones.
   35 --
   36 -- The recomended way to assemble lazy ByteStrings from smaller parts
   37 -- is to use the builder monoid from "Data.ByteString.Builder".
   38 --
   39 -- This module is intended to be imported @qualified@, to avoid name
   40 -- clashes with "Prelude" functions.  eg.
   41 --
   42 -- > import qualified Data.ByteString.Lazy as B
   43 --
   44 -- Original GHC implementation by Bryan O\'Sullivan.
   45 -- Rewritten to use 'Data.Array.Unboxed.UArray' by Simon Marlow.
   46 -- Rewritten to support slices and use 'Foreign.ForeignPtr.ForeignPtr'
   47 -- by David Roundy.
   48 -- Rewritten again and extended by Don Stewart and Duncan Coutts.
   49 -- Lazy variant by Duncan Coutts and Don Stewart.
   50 --
   51 
   52 module Data.ByteString.Lazy (
   53 
   54         -- * The @ByteString@ type
   55         ByteString,             -- instances: Eq, Ord, Show, Read, Data, Typeable
   56 
   57         -- * Introducing and eliminating 'ByteString's
   58         empty,                  -- :: ByteString
   59         singleton,              -- :: Word8   -> ByteString
   60         pack,                   -- :: [Word8] -> ByteString
   61         unpack,                 -- :: ByteString -> [Word8]
   62         fromStrict,             -- :: Strict.ByteString -> ByteString
   63         toStrict,               -- :: ByteString -> Strict.ByteString
   64         fromChunks,             -- :: [Strict.ByteString] -> ByteString
   65         toChunks,               -- :: ByteString -> [Strict.ByteString]
   66         foldrChunks,            -- :: (S.ByteString -> a -> a) -> a -> ByteString -> a
   67         foldlChunks,            -- :: (a -> S.ByteString -> a) -> a -> ByteString -> a
   68 
   69         -- * Basic interface
   70         cons,                   -- :: Word8 -> ByteString -> ByteString
   71         cons',                  -- :: Word8 -> ByteString -> ByteString
   72         snoc,                   -- :: ByteString -> Word8 -> ByteString
   73         append,                 -- :: ByteString -> ByteString -> ByteString
   74         head,                   -- :: ByteString -> Word8
   75         uncons,                 -- :: ByteString -> Maybe (Word8, ByteString)
   76         unsnoc,                 -- :: ByteString -> Maybe (ByteString, Word8)
   77         last,                   -- :: ByteString -> Word8
   78         tail,                   -- :: ByteString -> ByteString
   79         init,                   -- :: ByteString -> ByteString
   80         null,                   -- :: ByteString -> Bool
   81         length,                 -- :: ByteString -> Int64
   82 
   83         -- * Transforming ByteStrings
   84         map,                    -- :: (Word8 -> Word8) -> ByteString -> ByteString
   85         reverse,                -- :: ByteString -> ByteString
   86         intersperse,            -- :: Word8 -> ByteString -> ByteString
   87         intercalate,            -- :: ByteString -> [ByteString] -> ByteString
   88         transpose,              -- :: [ByteString] -> [ByteString]
   89 
   90         -- * Reducing 'ByteString's (folds)
   91         foldl,                  -- :: (a -> Word8 -> a) -> a -> ByteString -> a
   92         foldl',                 -- :: (a -> Word8 -> a) -> a -> ByteString -> a
   93         foldl1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
   94         foldl1',                -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
   95         foldr,                  -- :: (Word8 -> a -> a) -> a -> ByteString -> a
   96         foldr1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
   97 
   98         -- ** Special folds
   99         concat,                 -- :: [ByteString] -> ByteString
  100         concatMap,              -- :: (Word8 -> ByteString) -> ByteString -> ByteString
  101         any,                    -- :: (Word8 -> Bool) -> ByteString -> Bool
  102         all,                    -- :: (Word8 -> Bool) -> ByteString -> Bool
  103         maximum,                -- :: ByteString -> Word8
  104         minimum,                -- :: ByteString -> Word8
  105 
  106         -- * Building ByteStrings
  107         -- ** Scans
  108         scanl,                  -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
  109 --        scanl1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
  110 --        scanr,                  -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
  111 --        scanr1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
  112 
  113         -- ** Accumulating maps
  114         mapAccumL,              -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
  115         mapAccumR,              -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
  116 
  117         -- ** Infinite ByteStrings
  118         repeat,                 -- :: Word8 -> ByteString
  119         replicate,              -- :: Int64 -> Word8 -> ByteString
  120         cycle,                  -- :: ByteString -> ByteString
  121         iterate,                -- :: (Word8 -> Word8) -> Word8 -> ByteString
  122 
  123         -- ** Unfolding ByteStrings
  124         unfoldr,                -- :: (a -> Maybe (Word8, a)) -> a -> ByteString
  125 
  126         -- * Substrings
  127 
  128         -- ** Breaking strings
  129         take,                   -- :: Int64 -> ByteString -> ByteString
  130         drop,                   -- :: Int64 -> ByteString -> ByteString
  131         splitAt,                -- :: Int64 -> ByteString -> (ByteString, ByteString)
  132         takeWhile,              -- :: (Word8 -> Bool) -> ByteString -> ByteString
  133         dropWhile,              -- :: (Word8 -> Bool) -> ByteString -> ByteString
  134         span,                   -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
  135         break,                  -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
  136         group,                  -- :: ByteString -> [ByteString]
  137         groupBy,                -- :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
  138         inits,                  -- :: ByteString -> [ByteString]
  139         tails,                  -- :: ByteString -> [ByteString]
  140         stripPrefix,            -- :: ByteString -> ByteString -> Maybe ByteString
  141         stripSuffix,            -- :: ByteString -> ByteString -> Maybe ByteString
  142 
  143         -- ** Breaking into many substrings
  144         split,                  -- :: Word8 -> ByteString -> [ByteString]
  145         splitWith,              -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
  146 
  147         -- * Predicates
  148         isPrefixOf,             -- :: ByteString -> ByteString -> Bool
  149         isSuffixOf,             -- :: ByteString -> ByteString -> Bool
  150 --        isInfixOf,              -- :: ByteString -> ByteString -> Bool
  151 
  152         -- ** Search for arbitrary substrings
  153 --        isSubstringOf,          -- :: ByteString -> ByteString -> Bool
  154 --        findSubstring,          -- :: ByteString -> ByteString -> Maybe Int
  155 --        findSubstrings,         -- :: ByteString -> ByteString -> [Int]
  156 
  157         -- * Searching ByteStrings
  158 
  159         -- ** Searching by equality
  160         elem,                   -- :: Word8 -> ByteString -> Bool
  161         notElem,                -- :: Word8 -> ByteString -> Bool
  162 
  163         -- ** Searching with a predicate
  164         find,                   -- :: (Word8 -> Bool) -> ByteString -> Maybe Word8
  165         filter,                 -- :: (Word8 -> Bool) -> ByteString -> ByteString
  166         partition,              -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
  167 
  168         -- * Indexing ByteStrings
  169         index,                  -- :: ByteString -> Int64 -> Word8
  170         elemIndex,              -- :: Word8 -> ByteString -> Maybe Int64
  171         elemIndexEnd,           -- :: Word8 -> ByteString -> Maybe Int64
  172         elemIndices,            -- :: Word8 -> ByteString -> [Int64]
  173         findIndex,              -- :: (Word8 -> Bool) -> ByteString -> Maybe Int64
  174         findIndices,            -- :: (Word8 -> Bool) -> ByteString -> [Int64]
  175         count,                  -- :: Word8 -> ByteString -> Int64
  176 
  177         -- * Zipping and unzipping ByteStrings
  178         zip,                    -- :: ByteString -> ByteString -> [(Word8,Word8)]
  179         zipWith,                -- :: (Word8 -> Word8 -> c) -> ByteString -> ByteString -> [c]
  180         unzip,                  -- :: [(Word8,Word8)] -> (ByteString,ByteString)
  181 
  182         -- * Ordered ByteStrings
  183 --        sort,                   -- :: ByteString -> ByteString
  184 
  185         -- * Low level conversions
  186         -- ** Copying ByteStrings
  187         copy,                   -- :: ByteString -> ByteString
  188 --        defrag,                -- :: ByteString -> ByteString
  189 
  190         -- * I\/O with 'ByteString's
  191 
  192         -- ** Standard input and output
  193         getContents,            -- :: IO ByteString
  194         putStr,                 -- :: ByteString -> IO ()
  195         putStrLn,               -- :: ByteString -> IO ()
  196         interact,               -- :: (ByteString -> ByteString) -> IO ()
  197 
  198         -- ** Files
  199         readFile,               -- :: FilePath -> IO ByteString
  200         writeFile,              -- :: FilePath -> ByteString -> IO ()
  201         appendFile,             -- :: FilePath -> ByteString -> IO ()
  202 
  203         -- ** I\/O with Handles
  204         hGetContents,           -- :: Handle -> IO ByteString
  205         hGet,                   -- :: Handle -> Int -> IO ByteString
  206         hGetNonBlocking,        -- :: Handle -> Int -> IO ByteString
  207         hPut,                   -- :: Handle -> ByteString -> IO ()
  208         hPutNonBlocking,        -- :: Handle -> ByteString -> IO ByteString
  209         hPutStr,                -- :: Handle -> ByteString -> IO ()
  210 
  211   ) where
  212 
  213 import Prelude hiding
  214     (reverse,head,tail,last,init,null,length,map,lines,foldl,foldr,unlines
  215     ,concat,any,take,drop,splitAt,takeWhile,dropWhile,span,break,elem,filter,maximum
  216     ,minimum,all,concatMap,foldl1,foldr1,scanl, scanl1, scanr, scanr1
  217     ,repeat, cycle, interact, iterate,readFile,writeFile,appendFile,replicate
  218     ,getContents,getLine,putStr,putStrLn ,zip,zipWith,unzip,notElem)
  219 
  220 import qualified Data.List              as L  -- L for list/lazy
  221 import qualified Data.ByteString        as P  (ByteString) -- type name only
  222 import qualified Data.ByteString        as S  -- S for strict (hmm...)
  223 import qualified Data.ByteString.Internal as S
  224 import qualified Data.ByteString.Unsafe as S
  225 import Data.ByteString.Lazy.Internal
  226 
  227 #if !(MIN_VERSION_base(4,8,0))
  228 import Control.Applicative      ((<$>))
  229 import Data.Monoid              (Monoid(..))
  230 #endif
  231 import Control.Monad            (mplus)
  232 
  233 import Data.Word                (Word8)
  234 import Data.Int                 (Int64)
  235 import System.IO                (Handle,stdin,stdout,openBinaryFile,IOMode(..)
  236                                 ,hClose)
  237 import System.IO.Error          (mkIOError, illegalOperationErrorType)
  238 import System.IO.Unsafe
  239 import Control.Exception        (bracket)
  240 
  241 import Foreign.ForeignPtr       (withForeignPtr)
  242 import Foreign.Ptr
  243 import Foreign.Storable
  244 
  245 
  246 -- -----------------------------------------------------------------------------
  247 -- Introducing and eliminating 'ByteString's
  248 
  249 -- | /O(1)/ The empty 'ByteString'
  250 empty :: ByteString
  251 empty = Empty
  252 {-# INLINE empty #-}
  253 
  254 -- | /O(1)/ Convert a 'Word8' into a 'ByteString'
  255 singleton :: Word8 -> ByteString
  256 singleton w = Chunk (S.singleton w) Empty
  257 {-# INLINE singleton #-}
  258 
  259 -- | /O(n)/ Convert a '[Word8]' into a 'ByteString'.
  260 pack :: [Word8] -> ByteString
  261 pack = packBytes
  262 
  263 -- | /O(n)/ Converts a 'ByteString' to a '[Word8]'.
  264 unpack :: ByteString -> [Word8]
  265 unpack = unpackBytes
  266 
  267 -- | /O(c)/ Convert a list of strict 'ByteString' into a lazy 'ByteString'
  268 fromChunks :: [P.ByteString] -> ByteString
  269 fromChunks cs = L.foldr chunk Empty cs
  270 
  271 -- | /O(c)/ Convert a lazy 'ByteString' into a list of strict 'ByteString'
  272 toChunks :: ByteString -> [P.ByteString]
  273 toChunks cs = foldrChunks (:) [] cs
  274 
  275 -- |/O(1)/ Convert a strict 'ByteString' into a lazy 'ByteString'.
  276 fromStrict :: P.ByteString -> ByteString
  277 fromStrict bs | S.null bs = Empty
  278               | otherwise = Chunk bs Empty
  279 
  280 -- |/O(n)/ Convert a lazy 'ByteString' into a strict 'ByteString'.
  281 --
  282 -- Note that this is an /expensive/ operation that forces the whole lazy
  283 -- ByteString into memory and then copies all the data. If possible, try to
  284 -- avoid converting back and forth between strict and lazy bytestrings.
  285 --
  286 toStrict :: ByteString -> S.ByteString
  287 toStrict cs0 = goLen0 cs0
  288   where
  289     -- It's still possible that the result is empty
  290     goLen0 Empty                   = S.empty
  291     goLen0 (Chunk c cs) | S.null c = goLen0 cs
  292     goLen0 (Chunk c cs)            = goLen1 c cs
  293 
  294     -- It's still possible that the result is a single chunk
  295     goLen1 bs Empty                = bs
  296     goLen1 bs (Chunk c cs)
  297       | S.null c                   = goLen1 bs cs
  298       | otherwise                  =
  299         goLen (S.checkedAdd "Lazy.concat" (S.length bs) (S.length c)) cs
  300 
  301     -- General case, just find the total length we'll need
  302     goLen !total (Chunk c cs)      = goLen total' cs
  303       where
  304         total' = S.checkedAdd "Lazy.concat" total (S.length c)
  305     goLen total Empty =
  306       S.unsafeCreate total $ \ptr -> goCopy cs0 ptr
  307 
  308     -- Copy the data
  309     goCopy Empty                        !_   = return ()
  310     goCopy (Chunk (S.PS _  _   0  ) cs) !ptr = goCopy cs ptr
  311     goCopy (Chunk (S.PS fp off len) cs) !ptr = do
  312       withForeignPtr fp $ \p -> do
  313         S.memcpy ptr (p `plusPtr` off) len
  314         goCopy cs (ptr `plusPtr` len)
  315 -- See the comment on Data.ByteString.Internal.concat for some background on
  316 -- this implementation.
  317 
  318 ------------------------------------------------------------------------
  319 
  320 {-
  321 -- | /O(n)/ Convert a '[a]' into a 'ByteString' using some
  322 -- conversion function
  323 packWith :: (a -> Word8) -> [a] -> ByteString
  324 packWith k str = LPS $ L.map (P.packWith k) (chunk defaultChunkSize str)
  325 {-# INLINE packWith #-}
  326 {-# SPECIALIZE packWith :: (Char -> Word8) -> [Char] -> ByteString #-}
  327 
  328 -- | /O(n)/ Converts a 'ByteString' to a '[a]', using a conversion function.
  329 unpackWith :: (Word8 -> a) -> ByteString -> [a]
  330 unpackWith k (LPS ss) = L.concatMap (S.unpackWith k) ss
  331 {-# INLINE unpackWith #-}
  332 {-# SPECIALIZE unpackWith :: (Word8 -> Char) -> ByteString -> [Char] #-}
  333 -}
  334 
  335 -- ---------------------------------------------------------------------
  336 -- Basic interface
  337 
  338 -- | /O(1)/ Test whether a ByteString is empty.
  339 null :: ByteString -> Bool
  340 null Empty = True
  341 null _     = False
  342 {-# INLINE null #-}
  343 
  344 -- | /O(n\/c)/ 'length' returns the length of a ByteString as an 'Int64'
  345 length :: ByteString -> Int64
  346 length cs = foldlChunks (\n c -> n + fromIntegral (S.length c)) 0 cs
  347 {-# INLINE length #-}
  348 
  349 infixr 5 `cons`, `cons'` --same as list (:)
  350 infixl 5 `snoc`
  351 
  352 -- | /O(1)/ 'cons' is analogous to '(:)' for lists.
  353 --
  354 cons :: Word8 -> ByteString -> ByteString
  355 cons c cs = Chunk (S.singleton c) cs
  356 {-# INLINE cons #-}
  357 
  358 -- | /O(1)/ Unlike 'cons', 'cons\'' is
  359 -- strict in the ByteString that we are consing onto. More precisely, it forces
  360 -- the head and the first chunk. It does this because, for space efficiency, it
  361 -- may coalesce the new byte onto the first \'chunk\' rather than starting a
  362 -- new \'chunk\'.
  363 --
  364 -- So that means you can't use a lazy recursive contruction like this:
  365 --
  366 -- > let xs = cons\' c xs in xs
  367 --
  368 -- You can however use 'cons', as well as 'repeat' and 'cycle', to build
  369 -- infinite lazy ByteStrings.
  370 --
  371 cons' :: Word8 -> ByteString -> ByteString
  372 cons' w (Chunk c cs) | S.length c < 16 = Chunk (S.cons w c) cs
  373 cons' w cs                             = Chunk (S.singleton w) cs
  374 {-# INLINE cons' #-}
  375 
  376 -- | /O(n\/c)/ Append a byte to the end of a 'ByteString'
  377 snoc :: ByteString -> Word8 -> ByteString
  378 snoc cs w = foldrChunks Chunk (singleton w) cs
  379 {-# INLINE snoc #-}
  380 
  381 -- | /O(1)/ Extract the first element of a ByteString, which must be non-empty.
  382 head :: ByteString -> Word8
  383 head Empty       = errorEmptyList "head"
  384 head (Chunk c _) = S.unsafeHead c
  385 {-# INLINE head #-}
  386 
  387 -- | /O(1)/ Extract the head and tail of a ByteString, returning Nothing
  388 -- if it is empty.
  389 uncons :: ByteString -> Maybe (Word8, ByteString)
  390 uncons Empty = Nothing
  391 uncons (Chunk c cs)
  392     = Just (S.unsafeHead c,
  393             if S.length c == 1 then cs else Chunk (S.unsafeTail c) cs)
  394 {-# INLINE uncons #-}
  395 
  396 -- | /O(1)/ Extract the elements after the head of a ByteString, which must be
  397 -- non-empty.
  398 tail :: ByteString -> ByteString
  399 tail Empty          = errorEmptyList "tail"
  400 tail (Chunk c cs)
  401   | S.length c == 1 = cs
  402   | otherwise       = Chunk (S.unsafeTail c) cs
  403 {-# INLINE tail #-}
  404 
  405 -- | /O(n\/c)/ Extract the last element of a ByteString, which must be finite
  406 -- and non-empty.
  407 last :: ByteString -> Word8
  408 last Empty          = errorEmptyList "last"
  409 last (Chunk c0 cs0) = go c0 cs0
  410   where go c Empty        = S.unsafeLast c
  411         go _ (Chunk c cs) = go c cs
  412 -- XXX Don't inline this. Something breaks with 6.8.2 (haven't investigated yet)
  413 
  414 -- | /O(n\/c)/ Return all the elements of a 'ByteString' except the last one.
  415 init :: ByteString -> ByteString
  416 init Empty          = errorEmptyList "init"
  417 init (Chunk c0 cs0) = go c0 cs0
  418   where go c Empty | S.length c == 1 = Empty
  419                    | otherwise       = Chunk (S.unsafeInit c) Empty
  420         go c (Chunk c' cs)           = Chunk c (go c' cs)
  421 
  422 -- | /O(n\/c)/ Extract the 'init' and 'last' of a ByteString, returning Nothing
  423 -- if it is empty.
  424 --
  425 -- * It is no faster than using 'init' and 'last'
  426 unsnoc :: ByteString -> Maybe (ByteString, Word8)
  427 unsnoc Empty        = Nothing
  428 unsnoc (Chunk c cs) = Just (init (Chunk c cs), last (Chunk c cs))
  429 
  430 -- | /O(n\/c)/ Append two ByteStrings
  431 append :: ByteString -> ByteString -> ByteString
  432 append = mappend
  433 {-# INLINE append #-}
  434 
  435 -- ---------------------------------------------------------------------
  436 -- Transformations
  437 
  438 -- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each
  439 -- element of @xs@.
  440 map :: (Word8 -> Word8) -> ByteString -> ByteString
  441 map f s = go s
  442     where
  443         go Empty        = Empty
  444         go (Chunk x xs) = Chunk y ys
  445             where
  446                 y  = S.map f x
  447                 ys = go xs
  448 {-# INLINE map #-}
  449 
  450 -- | /O(n)/ 'reverse' @xs@ returns the elements of @xs@ in reverse order.
  451 reverse :: ByteString -> ByteString
  452 reverse cs0 = rev Empty cs0
  453   where rev a Empty        = a
  454         rev a (Chunk c cs) = rev (Chunk (S.reverse c) a) cs
  455 {-# INLINE reverse #-}
  456 
  457 -- | The 'intersperse' function takes a 'Word8' and a 'ByteString' and
  458 -- \`intersperses\' that byte between the elements of the 'ByteString'.
  459 -- It is analogous to the intersperse function on Lists.
  460 intersperse :: Word8 -> ByteString -> ByteString
  461 intersperse _ Empty        = Empty
  462 intersperse w (Chunk c cs) = Chunk (S.intersperse w c)
  463                                    (foldrChunks (Chunk . intersperse') Empty cs)
  464   where intersperse' :: P.ByteString -> P.ByteString
  465         intersperse' (S.PS fp o l) =
  466           S.unsafeCreate (2*l) $ \p' -> withForeignPtr fp $ \p -> do
  467             poke p' w
  468             S.c_intersperse (p' `plusPtr` 1) (p `plusPtr` o) (fromIntegral l) w
  469 
  470 -- | The 'transpose' function transposes the rows and columns of its
  471 -- 'ByteString' argument.
  472 transpose :: [ByteString] -> [ByteString]
  473 transpose css = L.map (\ss -> Chunk (S.pack ss) Empty)
  474                       (L.transpose (L.map unpack css))
  475 --TODO: make this fast
  476 
  477 -- ---------------------------------------------------------------------
  478 -- Reducing 'ByteString's
  479 
  480 -- | 'foldl', applied to a binary operator, a starting value (typically
  481 -- the left-identity of the operator), and a ByteString, reduces the
  482 -- ByteString using the binary operator, from left to right.
  483 foldl :: (a -> Word8 -> a) -> a -> ByteString -> a
  484 foldl f z = go z
  485   where go a Empty        = a
  486         go a (Chunk c cs) = go (S.foldl f a c) cs
  487 {-# INLINE foldl #-}
  488 
  489 -- | 'foldl\'' is like 'foldl', but strict in the accumulator.
  490 foldl' :: (a -> Word8 -> a) -> a -> ByteString -> a
  491 foldl' f z = go z
  492   where go !a Empty        = a
  493         go !a (Chunk c cs) = go (S.foldl' f a c) cs
  494 {-# INLINE foldl' #-}
  495 
  496 -- | 'foldr', applied to a binary operator, a starting value
  497 -- (typically the right-identity of the operator), and a ByteString,
  498 -- reduces the ByteString using the binary operator, from right to left.
  499 foldr :: (Word8 -> a -> a) -> a -> ByteString -> a
  500 foldr k z cs = foldrChunks (flip (S.foldr k)) z cs
  501 {-# INLINE foldr #-}
  502 
  503 -- | 'foldl1' is a variant of 'foldl' that has no starting value
  504 -- argument, and thus must be applied to non-empty 'ByteStrings'.
  505 foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  506 foldl1 _ Empty        = errorEmptyList "foldl1"
  507 foldl1 f (Chunk c cs) = foldl f (S.unsafeHead c) (Chunk (S.unsafeTail c) cs)
  508 
  509 -- | 'foldl1\'' is like 'foldl1', but strict in the accumulator.
  510 foldl1' :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  511 foldl1' _ Empty        = errorEmptyList "foldl1'"
  512 foldl1' f (Chunk c cs) = foldl' f (S.unsafeHead c) (Chunk (S.unsafeTail c) cs)
  513 
  514 -- | 'foldr1' is a variant of 'foldr' that has no starting value argument,
  515 -- and thus must be applied to non-empty 'ByteString's
  516 foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  517 foldr1 _ Empty          = errorEmptyList "foldr1"
  518 foldr1 f (Chunk c0 cs0) = go c0 cs0
  519   where go c Empty         = S.foldr1 f c
  520         go c (Chunk c' cs) = S.foldr  f (go c' cs) c
  521 
  522 -- ---------------------------------------------------------------------
  523 -- Special folds
  524 
  525 -- | /O(n)/ Concatenate a list of ByteStrings.
  526 concat :: [ByteString] -> ByteString
  527 concat = mconcat
  528 
  529 -- | Map a function over a 'ByteString' and concatenate the results
  530 concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString
  531 concatMap _ Empty        = Empty
  532 concatMap f (Chunk c0 cs0) = to c0 cs0
  533   where
  534     go :: ByteString -> P.ByteString -> ByteString -> ByteString
  535     go Empty        c' cs' = to c' cs'
  536     go (Chunk c cs) c' cs' = Chunk c (go cs c' cs')
  537 
  538     to :: P.ByteString -> ByteString -> ByteString
  539     to c cs | S.null c  = case cs of
  540         Empty          -> Empty
  541         (Chunk c' cs') -> to c' cs'
  542             | otherwise = go (f (S.unsafeHead c)) (S.unsafeTail c) cs
  543 
  544 -- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if
  545 -- any element of the 'ByteString' satisfies the predicate.
  546 any :: (Word8 -> Bool) -> ByteString -> Bool
  547 any f cs = foldrChunks (\c rest -> S.any f c || rest) False cs
  548 {-# INLINE any #-}
  549 -- todo fuse
  550 
  551 -- | /O(n)/ Applied to a predicate and a 'ByteString', 'all' determines
  552 -- if all elements of the 'ByteString' satisfy the predicate.
  553 all :: (Word8 -> Bool) -> ByteString -> Bool
  554 all f cs = foldrChunks (\c rest -> S.all f c && rest) True cs
  555 {-# INLINE all #-}
  556 -- todo fuse
  557 
  558 -- | /O(n)/ 'maximum' returns the maximum value from a 'ByteString'
  559 maximum :: ByteString -> Word8
  560 maximum Empty        = errorEmptyList "maximum"
  561 maximum (Chunk c cs) = foldlChunks (\n c' -> n `max` S.maximum c')
  562                                    (S.maximum c) cs
  563 {-# INLINE maximum #-}
  564 
  565 -- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString'
  566 minimum :: ByteString -> Word8
  567 minimum Empty        = errorEmptyList "minimum"
  568 minimum (Chunk c cs) = foldlChunks (\n c' -> n `min` S.minimum c')
  569                                      (S.minimum c) cs
  570 {-# INLINE minimum #-}
  571 
  572 -- | The 'mapAccumL' function behaves like a combination of 'map' and
  573 -- 'foldl'; it applies a function to each element of a ByteString,
  574 -- passing an accumulating parameter from left to right, and returning a
  575 -- final value of this accumulator together with the new ByteString.
  576 mapAccumL :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
  577 mapAccumL f s0 cs0 = go s0 cs0
  578   where
  579     go s Empty        = (s, Empty)
  580     go s (Chunk c cs) = (s'', Chunk c' cs')
  581         where (s',  c')  = S.mapAccumL f s c
  582               (s'', cs') = go s' cs
  583 
  584 -- | The 'mapAccumR' function behaves like a combination of 'map' and
  585 -- 'foldr'; it applies a function to each element of a ByteString,
  586 -- passing an accumulating parameter from right to left, and returning a
  587 -- final value of this accumulator together with the new ByteString.
  588 mapAccumR :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
  589 mapAccumR f s0 cs0 = go s0 cs0
  590   where
  591     go s Empty        = (s, Empty)
  592     go s (Chunk c cs) = (s'', Chunk c' cs')
  593         where (s'', c') = S.mapAccumR f s' c
  594               (s', cs') = go s cs
  595 
  596 -- ---------------------------------------------------------------------
  597 -- Building ByteStrings
  598 
  599 -- | 'scanl' is similar to 'foldl', but returns a list of successive
  600 -- reduced values from the left. This function will fuse.
  601 --
  602 -- > scanl f z [x1, x2, ...] == [z, z `f` x1, (z `f` x1) `f` x2, ...]
  603 --
  604 -- Note that
  605 --
  606 -- > last (scanl f z xs) == foldl f z xs.
  607 scanl :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
  608 scanl f z = snd . foldl k (z,singleton z)
  609  where
  610     k (c,acc) a = let n = f c a in (n, acc `snoc` n)
  611 {-# INLINE scanl #-}
  612 
  613 -- ---------------------------------------------------------------------
  614 -- Unfolds and replicates
  615 
  616 -- | @'iterate' f x@ returns an infinite ByteString of repeated applications
  617 -- of @f@ to @x@:
  618 --
  619 -- > iterate f x == [x, f x, f (f x), ...]
  620 --
  621 iterate :: (Word8 -> Word8) -> Word8 -> ByteString
  622 iterate f = unfoldr (\x -> case f x of !x' -> Just (x', x'))
  623 
  624 -- | @'repeat' x@ is an infinite ByteString, with @x@ the value of every
  625 -- element.
  626 --
  627 repeat :: Word8 -> ByteString
  628 repeat w = cs where cs = Chunk (S.replicate smallChunkSize w) cs
  629 
  630 -- | /O(n)/ @'replicate' n x@ is a ByteString of length @n@ with @x@
  631 -- the value of every element.
  632 --
  633 replicate :: Int64 -> Word8 -> ByteString
  634 replicate n w
  635     | n <= 0             = Empty
  636     | n < fromIntegral smallChunkSize = Chunk (S.replicate (fromIntegral n) w) Empty
  637     | r == 0             = cs -- preserve invariant
  638     | otherwise          = Chunk (S.unsafeTake (fromIntegral r) c) cs
  639  where
  640     c      = S.replicate smallChunkSize w
  641     cs     = nChunks q
  642     (q, r) = quotRem n (fromIntegral smallChunkSize)
  643     nChunks 0 = Empty
  644     nChunks m = Chunk c (nChunks (m-1))
  645 
  646 -- | 'cycle' ties a finite ByteString into a circular one, or equivalently,
  647 -- the infinite repetition of the original ByteString.
  648 --
  649 cycle :: ByteString -> ByteString
  650 cycle Empty = errorEmptyList "cycle"
  651 cycle cs    = cs' where cs' = foldrChunks Chunk cs' cs
  652 
  653 -- | /O(n)/ The 'unfoldr' function is analogous to the List \'unfoldr\'.
  654 -- 'unfoldr' builds a ByteString from a seed value.  The function takes
  655 -- the element and returns 'Nothing' if it is done producing the
  656 -- ByteString or returns 'Just' @(a,b)@, in which case, @a@ is a
  657 -- prepending to the ByteString and @b@ is used as the next element in a
  658 -- recursive call.
  659 unfoldr :: (a -> Maybe (Word8, a)) -> a -> ByteString
  660 unfoldr f s0 = unfoldChunk 32 s0
  661   where unfoldChunk n s =
  662           case S.unfoldrN n f s of
  663             (c, Nothing)
  664               | S.null c  -> Empty
  665               | otherwise -> Chunk c Empty
  666             (c, Just s')  -> Chunk c (unfoldChunk (n*2) s')
  667 
  668 -- ---------------------------------------------------------------------
  669 -- Substrings
  670 
  671 -- | /O(n\/c)/ 'take' @n@, applied to a ByteString @xs@, returns the prefix
  672 -- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@.
  673 take :: Int64 -> ByteString -> ByteString
  674 take i _ | i <= 0 = Empty
  675 take i cs0         = take' i cs0
  676   where take' 0 _            = Empty
  677         take' _ Empty        = Empty
  678         take' n (Chunk c cs) =
  679           if n < fromIntegral (S.length c)
  680             then Chunk (S.take (fromIntegral n) c) Empty
  681             else Chunk c (take' (n - fromIntegral (S.length c)) cs)
  682 
  683 -- | /O(n\/c)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@
  684 -- elements, or @[]@ if @n > 'length' xs@.
  685 drop  :: Int64 -> ByteString -> ByteString
  686 drop i p | i <= 0 = p
  687 drop i cs0 = drop' i cs0
  688   where drop' 0 cs           = cs
  689         drop' _ Empty        = Empty
  690         drop' n (Chunk c cs) =
  691           if n < fromIntegral (S.length c)
  692             then Chunk (S.drop (fromIntegral n) c) cs
  693             else drop' (n - fromIntegral (S.length c)) cs
  694 
  695 -- | /O(n\/c)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@.
  696 splitAt :: Int64 -> ByteString -> (ByteString, ByteString)
  697 splitAt i cs0 | i <= 0 = (Empty, cs0)
  698 splitAt i cs0 = splitAt' i cs0
  699   where splitAt' 0 cs           = (Empty, cs)
  700         splitAt' _ Empty        = (Empty, Empty)
  701         splitAt' n (Chunk c cs) =
  702           if n < fromIntegral (S.length c)
  703             then (Chunk (S.take (fromIntegral n) c) Empty
  704                  ,Chunk (S.drop (fromIntegral n) c) cs)
  705             else let (cs', cs'') = splitAt' (n - fromIntegral (S.length c)) cs
  706                    in (Chunk c cs', cs'')
  707 
  708 
  709 -- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@,
  710 -- returns the longest prefix (possibly empty) of @xs@ of elements that
  711 -- satisfy @p@.
  712 takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString
  713 takeWhile f cs0 = takeWhile' cs0
  714   where takeWhile' Empty        = Empty
  715         takeWhile' (Chunk c cs) =
  716           case findIndexOrEnd (not . f) c of
  717             0                  -> Empty
  718             n | n < S.length c -> Chunk (S.take n c) Empty
  719               | otherwise      -> Chunk c (takeWhile' cs)
  720 
  721 -- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@.
  722 dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString
  723 dropWhile f cs0 = dropWhile' cs0
  724   where dropWhile' Empty        = Empty
  725         dropWhile' (Chunk c cs) =
  726           case findIndexOrEnd (not . f) c of
  727             n | n < S.length c -> Chunk (S.drop n c) cs
  728               | otherwise      -> dropWhile' cs
  729 
  730 -- | 'break' @p@ is equivalent to @'span' ('not' . p)@.
  731 break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
  732 break f cs0 = break' cs0
  733   where break' Empty        = (Empty, Empty)
  734         break' (Chunk c cs) =
  735           case findIndexOrEnd f c of
  736             0                  -> (Empty, Chunk c cs)
  737             n | n < S.length c -> (Chunk (S.take n c) Empty
  738                                   ,Chunk (S.drop n c) cs)
  739               | otherwise      -> let (cs', cs'') = break' cs
  740                                    in (Chunk c cs', cs'')
  741 
  742 --
  743 -- TODO
  744 --
  745 -- Add rules
  746 --
  747 
  748 {-
  749 -- | 'breakByte' breaks its ByteString argument at the first occurence
  750 -- of the specified byte. It is more efficient than 'break' as it is
  751 -- implemented with @memchr(3)@. I.e.
  752 --
  753 -- > break (=='c') "abcd" == breakByte 'c' "abcd"
  754 --
  755 breakByte :: Word8 -> ByteString -> (ByteString, ByteString)
  756 breakByte c (LPS ps) = case (breakByte' ps) of (a,b) -> (LPS a, LPS b)
  757   where breakByte' []     = ([], [])
  758         breakByte' (x:xs) =
  759           case P.elemIndex c x of
  760             Just 0  -> ([], x : xs)
  761             Just n  -> (P.take n x : [], P.drop n x : xs)
  762             Nothing -> let (xs', xs'') = breakByte' xs
  763                         in (x : xs', xs'')
  764 
  765 -- | 'spanByte' breaks its ByteString argument at the first
  766 -- occurence of a byte other than its argument. It is more efficient
  767 -- than 'span (==)'
  768 --
  769 -- > span  (=='c') "abcd" == spanByte 'c' "abcd"
  770 --
  771 spanByte :: Word8 -> ByteString -> (ByteString, ByteString)
  772 spanByte c (LPS ps) = case (spanByte' ps) of (a,b) -> (LPS a, LPS b)
  773   where spanByte' []     = ([], [])
  774         spanByte' (x:xs) =
  775           case P.spanByte c x of
  776             (x', x'') | P.null x'  -> ([], x : xs)
  777                       | P.null x'' -> let (xs', xs'') = spanByte' xs
  778                                        in (x : xs', xs'')
  779                       | otherwise  -> (x' : [], x'' : xs)
  780 -}
  781 
  782 -- | 'span' @p xs@ breaks the ByteString into two segments. It is
  783 -- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@
  784 span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
  785 span p = break (not . p)
  786 
  787 -- | /O(n)/ Splits a 'ByteString' into components delimited by
  788 -- separators, where the predicate returns True for a separator element.
  789 -- The resulting components do not contain the separators.  Two adjacent
  790 -- separators result in an empty component in the output.  eg.
  791 --
  792 -- > splitWith (=='a') "aabbaca" == ["","","bb","c",""]
  793 -- > splitWith (=='a') []        == []
  794 --
  795 splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString]
  796 splitWith _ Empty          = []
  797 splitWith p (Chunk c0 cs0) = comb [] (S.splitWith p c0) cs0
  798 
  799   where comb :: [P.ByteString] -> [P.ByteString] -> ByteString -> [ByteString]
  800         comb acc (s:[]) Empty        = revChunks (s:acc) : []
  801         comb acc (s:[]) (Chunk c cs) = comb (s:acc) (S.splitWith p c) cs
  802         comb acc (s:ss) cs           = revChunks (s:acc) : comb [] ss cs
  803 
  804 {-# INLINE splitWith #-}
  805 
  806 -- | /O(n)/ Break a 'ByteString' into pieces separated by the byte
  807 -- argument, consuming the delimiter. I.e.
  808 --
  809 -- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"]
  810 -- > split 'a'  "aXaXaXa"    == ["","X","X","X",""]
  811 -- > split 'x'  "x"          == ["",""]
  812 --
  813 -- and
  814 --
  815 -- > intercalate [c] . split c == id
  816 -- > split == splitWith . (==)
  817 --
  818 -- As for all splitting functions in this library, this function does
  819 -- not copy the substrings, it just constructs new 'ByteStrings' that
  820 -- are slices of the original.
  821 --
  822 split :: Word8 -> ByteString -> [ByteString]
  823 split _ Empty     = []
  824 split w (Chunk c0 cs0) = comb [] (S.split w c0) cs0
  825 
  826   where comb :: [P.ByteString] -> [P.ByteString] -> ByteString -> [ByteString]
  827         comb acc (s:[]) Empty        = revChunks (s:acc) : []
  828         comb acc (s:[]) (Chunk c cs) = comb (s:acc) (S.split w c) cs
  829         comb acc (s:ss) cs           = revChunks (s:acc) : comb [] ss cs
  830 {-# INLINE split #-}
  831 
  832 -- | The 'group' function takes a ByteString and returns a list of
  833 -- ByteStrings such that the concatenation of the result is equal to the
  834 -- argument.  Moreover, each sublist in the result contains only equal
  835 -- elements.  For example,
  836 --
  837 -- > group "Mississippi" = ["M","i","ss","i","ss","i","pp","i"]
  838 --
  839 -- It is a special case of 'groupBy', which allows the programmer to
  840 -- supply their own equality test.
  841 group :: ByteString -> [ByteString]
  842 group = go
  843   where
  844     go Empty        = []
  845     go (Chunk c cs)
  846       | S.length c == 1  = to [c] (S.unsafeHead c) cs
  847       | otherwise        = to [S.unsafeTake 1 c] (S.unsafeHead c) (Chunk (S.unsafeTail c) cs)
  848 
  849     to acc !_ Empty        = revNonEmptyChunks acc : []
  850     to acc !w (Chunk c cs) =
  851       case findIndexOrEnd (/= w) c of
  852         0                    -> revNonEmptyChunks acc
  853                               : go (Chunk c cs)
  854         n | n == S.length c  -> to (S.unsafeTake n c : acc) w cs
  855           | otherwise        -> revNonEmptyChunks (S.unsafeTake n c : acc)
  856                               : go (Chunk (S.unsafeDrop n c) cs)
  857 
  858 -- | The 'groupBy' function is the non-overloaded version of 'group'.
  859 --
  860 groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
  861 groupBy k = go
  862   where
  863     go Empty        = []
  864     go (Chunk c cs)
  865       | S.length c == 1  = to [c] (S.unsafeHead c) cs
  866       | otherwise        = to [S.unsafeTake 1 c] (S.unsafeHead c) (Chunk (S.unsafeTail c) cs)
  867 
  868     to acc !_ Empty        = revNonEmptyChunks acc : []
  869     to acc !w (Chunk c cs) =
  870       case findIndexOrEnd (not . k w) c of
  871         0                    -> revNonEmptyChunks acc
  872                               : go (Chunk c cs)
  873         n | n == S.length c  -> to (S.unsafeTake n c : acc) w cs
  874           | otherwise        -> revNonEmptyChunks (S.unsafeTake n c : acc)
  875                               : go (Chunk (S.unsafeDrop n c) cs)
  876 
  877 -- | /O(n)/ The 'intercalate' function takes a 'ByteString' and a list of
  878 -- 'ByteString's and concatenates the list after interspersing the first
  879 -- argument between each element of the list.
  880 intercalate :: ByteString -> [ByteString] -> ByteString
  881 intercalate s = concat . (L.intersperse s)
  882 
  883 -- ---------------------------------------------------------------------
  884 -- Indexing ByteStrings
  885 
  886 -- | /O(c)/ 'ByteString' index (subscript) operator, starting from 0.
  887 index :: ByteString -> Int64 -> Word8
  888 index _  i | i < 0  = moduleError "index" ("negative index: " ++ show i)
  889 index cs0 i         = index' cs0 i
  890   where index' Empty     n = moduleError "index" ("index too large: " ++ show n)
  891         index' (Chunk c cs) n
  892           | n >= fromIntegral (S.length c) =
  893               index' cs (n - fromIntegral (S.length c))
  894           | otherwise       = S.unsafeIndex c (fromIntegral n)
  895 
  896 -- | /O(n)/ The 'elemIndex' function returns the index of the first
  897 -- element in the given 'ByteString' which is equal to the query
  898 -- element, or 'Nothing' if there is no such element.
  899 -- This implementation uses memchr(3).
  900 elemIndex :: Word8 -> ByteString -> Maybe Int64
  901 elemIndex w cs0 = elemIndex' 0 cs0
  902   where elemIndex' _ Empty        = Nothing
  903         elemIndex' n (Chunk c cs) =
  904           case S.elemIndex w c of
  905             Nothing -> elemIndex' (n + fromIntegral (S.length c)) cs
  906             Just i  -> Just (n + fromIntegral i)
  907 
  908 -- | /O(n)/ The 'elemIndexEnd' function returns the last index of the
  909 -- element in the given 'ByteString' which is equal to the query
  910 -- element, or 'Nothing' if there is no such element. The following
  911 -- holds:
  912 --
  913 -- > elemIndexEnd c xs ==
  914 -- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs)
  915 
  916 elemIndexEnd :: Word8 -> ByteString -> Maybe Int64
  917 elemIndexEnd w = elemIndexEnd' 0
  918   where
  919     elemIndexEnd' _ Empty = Nothing
  920     elemIndexEnd' n (Chunk c cs) =
  921       let !n' = n + S.length c
  922           !i  = fmap (fromIntegral . (n +)) $ S.elemIndexEnd w c
  923       in elemIndexEnd' n' cs `mplus` i
  924 
  925 -- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning
  926 -- the indices of all elements equal to the query element, in ascending order.
  927 -- This implementation uses memchr(3).
  928 elemIndices :: Word8 -> ByteString -> [Int64]
  929 elemIndices w cs0 = elemIndices' 0 cs0
  930   where elemIndices' _ Empty        = []
  931         elemIndices' n (Chunk c cs) = L.map ((+n).fromIntegral) (S.elemIndices w c)
  932                              ++ elemIndices' (n + fromIntegral (S.length c)) cs
  933 
  934 -- | count returns the number of times its argument appears in the ByteString
  935 --
  936 -- > count = length . elemIndices
  937 --
  938 -- But more efficiently than using length on the intermediate list.
  939 count :: Word8 -> ByteString -> Int64
  940 count w cs = foldlChunks (\n c -> n + fromIntegral (S.count w c)) 0 cs
  941 
  942 -- | The 'findIndex' function takes a predicate and a 'ByteString' and
  943 -- returns the index of the first element in the ByteString
  944 -- satisfying the predicate.
  945 findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int64
  946 findIndex k cs0 = findIndex' 0 cs0
  947   where findIndex' _ Empty        = Nothing
  948         findIndex' n (Chunk c cs) =
  949           case S.findIndex k c of
  950             Nothing -> findIndex' (n + fromIntegral (S.length c)) cs
  951             Just i  -> Just (n + fromIntegral i)
  952 {-# INLINE findIndex #-}
  953 
  954 -- | /O(n)/ The 'find' function takes a predicate and a ByteString,
  955 -- and returns the first element in matching the predicate, or 'Nothing'
  956 -- if there is no such element.
  957 --
  958 -- > find f p = case findIndex f p of Just n -> Just (p ! n) ; _ -> Nothing
  959 --
  960 find :: (Word8 -> Bool) -> ByteString -> Maybe Word8
  961 find f cs0 = find' cs0
  962   where find' Empty        = Nothing
  963         find' (Chunk c cs) = case S.find f c of
  964             Nothing -> find' cs
  965             Just w  -> Just w
  966 {-# INLINE find #-}
  967 
  968 -- | The 'findIndices' function extends 'findIndex', by returning the
  969 -- indices of all elements satisfying the predicate, in ascending order.
  970 findIndices :: (Word8 -> Bool) -> ByteString -> [Int64]
  971 findIndices k cs0 = findIndices' 0 cs0
  972   where findIndices' _ Empty        = []
  973         findIndices' n (Chunk c cs) = L.map ((+n).fromIntegral) (S.findIndices k c)
  974                              ++ findIndices' (n + fromIntegral (S.length c)) cs
  975 
  976 -- ---------------------------------------------------------------------
  977 -- Searching ByteStrings
  978 
  979 -- | /O(n)/ 'elem' is the 'ByteString' membership predicate.
  980 elem :: Word8 -> ByteString -> Bool
  981 elem w cs = case elemIndex w cs of Nothing -> False ; _ -> True
  982 
  983 -- | /O(n)/ 'notElem' is the inverse of 'elem'
  984 notElem :: Word8 -> ByteString -> Bool
  985 notElem w cs = not (elem w cs)
  986 
  987 -- | /O(n)/ 'filter', applied to a predicate and a ByteString,
  988 -- returns a ByteString containing those characters that satisfy the
  989 -- predicate.
  990 filter :: (Word8 -> Bool) -> ByteString -> ByteString
  991 filter p s = go s
  992     where
  993         go Empty        = Empty
  994         go (Chunk x xs) = chunk (S.filter p x) (go xs)
  995 {-# INLINE filter #-}
  996 
  997 {-
  998 -- | /O(n)/ and /O(n\/c) space/ A first order equivalent of /filter .
  999 -- (==)/, for the common case of filtering a single byte. It is more
 1000 -- efficient to use /filterByte/ in this case.
 1001 --
 1002 -- > filterByte == filter . (==)
 1003 --
 1004 -- filterByte is around 10x faster, and uses much less space, than its
 1005 -- filter equivalent
 1006 filterByte :: Word8 -> ByteString -> ByteString
 1007 filterByte w ps = replicate (count w ps) w
 1008 {-# INLINE filterByte #-}
 1009 
 1010 {-# RULES
 1011 "ByteString specialise filter (== x)" forall x.
 1012   filter ((==) x) = filterByte x
 1013 
 1014 "ByteString specialise filter (== x)" forall x.
 1015  filter (== x) = filterByte x
 1016   #-}
 1017 -}
 1018 
 1019 {-
 1020 -- | /O(n)/ A first order equivalent of /filter . (\/=)/, for the common
 1021 -- case of filtering a single byte out of a list. It is more efficient
 1022 -- to use /filterNotByte/ in this case.
 1023 --
 1024 -- > filterNotByte == filter . (/=)
 1025 --
 1026 -- filterNotByte is around 2x faster than its filter equivalent.
 1027 filterNotByte :: Word8 -> ByteString -> ByteString
 1028 filterNotByte w (LPS xs) = LPS (filterMap (P.filterNotByte w) xs)
 1029 -}
 1030 
 1031 -- | /O(n)/ The 'partition' function takes a predicate a ByteString and returns
 1032 -- the pair of ByteStrings with elements which do and do not satisfy the
 1033 -- predicate, respectively; i.e.,
 1034 --
 1035 -- > partition p bs == (filter p xs, filter (not . p) xs)
 1036 --
 1037 partition :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 1038 partition _ Empty = (Empty, Empty)
 1039 partition p (Chunk x xs) = (chunk t ts, chunk f fs)
 1040   where
 1041     (t,   f) = S.partition p x
 1042     (ts, fs) = partition   p xs
 1043 
 1044 -- ---------------------------------------------------------------------
 1045 -- Searching for substrings
 1046 
 1047 -- | /O(n)/ The 'isPrefixOf' function takes two ByteStrings and returns 'True'
 1048 -- iff the first is a prefix of the second.
 1049 isPrefixOf :: ByteString -> ByteString -> Bool
 1050 isPrefixOf Empty _  = True
 1051 isPrefixOf _ Empty  = False
 1052 isPrefixOf (Chunk x xs) (Chunk y ys)
 1053     | S.length x == S.length y = x == y  && isPrefixOf xs ys
 1054     | S.length x <  S.length y = x == yh && isPrefixOf xs (Chunk yt ys)
 1055     | otherwise                = xh == y && isPrefixOf (Chunk xt xs) ys
 1056   where (xh,xt) = S.splitAt (S.length y) x
 1057         (yh,yt) = S.splitAt (S.length x) y
 1058 
 1059 -- | /O(n)/ The 'stripPrefix' function takes two ByteStrings and returns 'Just'
 1060 -- the remainder of the second iff the first is its prefix, and otherwise
 1061 -- 'Nothing'.
 1062 stripPrefix :: ByteString -> ByteString -> Maybe ByteString
 1063 stripPrefix Empty bs  = Just bs
 1064 stripPrefix _ Empty  = Nothing
 1065 stripPrefix (Chunk x xs) (Chunk y ys)
 1066     | S.length x == S.length y = if x == y then stripPrefix xs ys else Nothing
 1067     | S.length x <  S.length y = do yt <- S.stripPrefix x y
 1068                                     stripPrefix xs (Chunk yt ys)
 1069     | otherwise                = do xt <- S.stripPrefix y x
 1070                                     stripPrefix (Chunk xt xs) ys
 1071 
 1072 -- | /O(n)/ The 'isSuffixOf' function takes two ByteStrings and returns 'True'
 1073 -- iff the first is a suffix of the second.
 1074 --
 1075 -- The following holds:
 1076 --
 1077 -- > isSuffixOf x y == reverse x `isPrefixOf` reverse y
 1078 --
 1079 isSuffixOf :: ByteString -> ByteString -> Bool
 1080 isSuffixOf x y = reverse x `isPrefixOf` reverse y
 1081 --TODO: a better implementation
 1082 
 1083 -- | /O(n)/ The 'stripSuffix' function takes two ByteStrings and returns 'Just'
 1084 -- the remainder of the second iff the first is its suffix, and otherwise
 1085 -- 'Nothing'.
 1086 stripSuffix :: ByteString -> ByteString -> Maybe ByteString
 1087 stripSuffix x y = reverse <$> stripPrefix (reverse x) (reverse y)
 1088 --TODO: a better implementation
 1089 
 1090 -- ---------------------------------------------------------------------
 1091 -- Zipping
 1092 
 1093 -- | /O(n)/ 'zip' takes two ByteStrings and returns a list of
 1094 -- corresponding pairs of bytes. If one input ByteString is short,
 1095 -- excess elements of the longer ByteString are discarded. This is
 1096 -- equivalent to a pair of 'unpack' operations.
 1097 zip :: ByteString -> ByteString -> [(Word8,Word8)]
 1098 zip = zipWith (,)
 1099 
 1100 -- | 'zipWith' generalises 'zip' by zipping with the function given as
 1101 -- the first argument, instead of a tupling function.  For example,
 1102 -- @'zipWith' (+)@ is applied to two ByteStrings to produce the list of
 1103 -- corresponding sums.
 1104 zipWith :: (Word8 -> Word8 -> a) -> ByteString -> ByteString -> [a]
 1105 zipWith _ Empty     _  = []
 1106 zipWith _ _      Empty = []
 1107 zipWith f (Chunk a as) (Chunk b bs) = go a as b bs
 1108   where
 1109     go x xs y ys = f (S.unsafeHead x) (S.unsafeHead y)
 1110                  : to (S.unsafeTail x) xs (S.unsafeTail y) ys
 1111 
 1112     to x Empty         _ _             | S.null x       = []
 1113     to _ _             y Empty         | S.null y       = []
 1114     to x xs            y ys            | not (S.null x)
 1115                                       && not (S.null y) = go x  xs y  ys
 1116     to x xs            _ (Chunk y' ys) | not (S.null x) = go x  xs y' ys
 1117     to _ (Chunk x' xs) y ys            | not (S.null y) = go x' xs y  ys
 1118     to _ (Chunk x' xs) _ (Chunk y' ys)                  = go x' xs y' ys
 1119 
 1120 -- | /O(n)/ 'unzip' transforms a list of pairs of bytes into a pair of
 1121 -- ByteStrings. Note that this performs two 'pack' operations.
 1122 unzip :: [(Word8,Word8)] -> (ByteString,ByteString)
 1123 unzip ls = (pack (L.map fst ls), pack (L.map snd ls))
 1124 {-# INLINE unzip #-}
 1125 
 1126 -- ---------------------------------------------------------------------
 1127 -- Special lists
 1128 
 1129 -- | /O(n)/ Return all initial segments of the given 'ByteString', shortest first.
 1130 inits :: ByteString -> [ByteString]
 1131 inits = (Empty :) . inits'
 1132   where inits' Empty        = []
 1133         inits' (Chunk c cs) = L.map (\c' -> Chunk c' Empty) (L.tail (S.inits c))
 1134                            ++ L.map (Chunk c) (inits' cs)
 1135 
 1136 -- | /O(n)/ Return all final segments of the given 'ByteString', longest first.
 1137 tails :: ByteString -> [ByteString]
 1138 tails Empty         = Empty : []
 1139 tails cs@(Chunk c cs')
 1140   | S.length c == 1 = cs : tails cs'
 1141   | otherwise       = cs : tails (Chunk (S.unsafeTail c) cs')
 1142 
 1143 -- ---------------------------------------------------------------------
 1144 -- Low level constructors
 1145 
 1146 -- | /O(n)/ Make a copy of the 'ByteString' with its own storage.
 1147 --   This is mainly useful to allow the rest of the data pointed
 1148 --   to by the 'ByteString' to be garbage collected, for example
 1149 --   if a large string has been read in, and only a small part of it
 1150 --   is needed in the rest of the program.
 1151 copy :: ByteString -> ByteString
 1152 copy cs = foldrChunks (Chunk . S.copy) Empty cs
 1153 --TODO, we could coalese small blocks here
 1154 --FIXME: probably not strict enough, if we're doing this to avoid retaining
 1155 -- the parent blocks then we'd better copy strictly.
 1156 
 1157 -- ---------------------------------------------------------------------
 1158 
 1159 -- TODO defrag func that concatenates block together that are below a threshold
 1160 -- defrag :: ByteString -> ByteString
 1161 
 1162 -- ---------------------------------------------------------------------
 1163 -- Lazy ByteString IO
 1164 --
 1165 -- Rule for when to close: is it expected to read the whole file?
 1166 -- If so, close when done.
 1167 --
 1168 
 1169 -- | Read entire handle contents /lazily/ into a 'ByteString'. Chunks
 1170 -- are read on demand, in at most @k@-sized chunks. It does not block
 1171 -- waiting for a whole @k@-sized chunk, so if less than @k@ bytes are
 1172 -- available then they will be returned immediately as a smaller chunk.
 1173 --
 1174 -- The handle is closed on EOF.
 1175 --
 1176 -- Note: the 'Handle' should be placed in binary mode with
 1177 -- 'System.IO.hSetBinaryMode' for 'hGetContentsN' to
 1178 -- work correctly.
 1179 --
 1180 hGetContentsN :: Int -> Handle -> IO ByteString
 1181 hGetContentsN k h = lazyRead -- TODO close on exceptions
 1182   where
 1183     lazyRead = unsafeInterleaveIO loop
 1184 
 1185     loop = do
 1186         c <- S.hGetSome h k -- only blocks if there is no data available
 1187         if S.null c
 1188           then do hClose h >> return Empty
 1189           else do cs <- lazyRead
 1190                   return (Chunk c cs)
 1191 
 1192 -- | Read @n@ bytes into a 'ByteString', directly from the
 1193 -- specified 'Handle', in chunks of size @k@.
 1194 --
 1195 hGetN :: Int -> Handle -> Int -> IO ByteString
 1196 hGetN k h n | n > 0 = readChunks n
 1197   where
 1198     readChunks !i = do
 1199         c <- S.hGet h (min k i)
 1200         case S.length c of
 1201             0 -> return Empty
 1202             m -> do cs <- readChunks (i - m)
 1203                     return (Chunk c cs)
 1204 
 1205 hGetN _ _ 0 = return Empty
 1206 hGetN _ h n = illegalBufferSize h "hGet" n
 1207 
 1208 -- | hGetNonBlockingN is similar to 'hGetContentsN', except that it will never block
 1209 -- waiting for data to become available, instead it returns only whatever data
 1210 -- is available. Chunks are read on demand, in @k@-sized chunks.
 1211 --
 1212 hGetNonBlockingN :: Int -> Handle -> Int -> IO ByteString
 1213 hGetNonBlockingN k h n | n > 0= readChunks n
 1214   where
 1215     readChunks !i = do
 1216         c <- S.hGetNonBlocking h (min k i)
 1217         case S.length c of
 1218             0 -> return Empty
 1219             m -> do cs <- readChunks (i - m)
 1220                     return (Chunk c cs)
 1221 
 1222 hGetNonBlockingN _ _ 0 = return Empty
 1223 hGetNonBlockingN _ h n = illegalBufferSize h "hGetNonBlocking" n
 1224 
 1225 illegalBufferSize :: Handle -> String -> Int -> IO a
 1226 illegalBufferSize handle fn sz =
 1227     ioError (mkIOError illegalOperationErrorType msg (Just handle) Nothing)
 1228     --TODO: System.IO uses InvalidArgument here, but it's not exported :-(
 1229     where
 1230       msg = fn ++ ": illegal ByteString size " ++ showsPrec 9 sz []
 1231 
 1232 -- | Read entire handle contents /lazily/ into a 'ByteString'. Chunks
 1233 -- are read on demand, using the default chunk size.
 1234 --
 1235 -- Once EOF is encountered, the Handle is closed.
 1236 --
 1237 -- Note: the 'Handle' should be placed in binary mode with
 1238 -- 'System.IO.hSetBinaryMode' for 'hGetContents' to
 1239 -- work correctly.
 1240 --
 1241 hGetContents :: Handle -> IO ByteString
 1242 hGetContents = hGetContentsN defaultChunkSize
 1243 
 1244 -- | Read @n@ bytes into a 'ByteString', directly from the specified 'Handle'.
 1245 --
 1246 hGet :: Handle -> Int -> IO ByteString
 1247 hGet = hGetN defaultChunkSize
 1248 
 1249 -- | hGetNonBlocking is similar to 'hGet', except that it will never block
 1250 -- waiting for data to become available, instead it returns only whatever data
 1251 -- is available.  If there is no data available to be read, 'hGetNonBlocking'
 1252 -- returns 'empty'.
 1253 --
 1254 -- Note: on Windows and with Haskell implementation other than GHC, this
 1255 -- function does not work correctly; it behaves identically to 'hGet'.
 1256 --
 1257 hGetNonBlocking :: Handle -> Int -> IO ByteString
 1258 hGetNonBlocking = hGetNonBlockingN defaultChunkSize
 1259 
 1260 -- | Read an entire file /lazily/ into a 'ByteString'.
 1261 -- The Handle will be held open until EOF is encountered.
 1262 --
 1263 readFile :: FilePath -> IO ByteString
 1264 readFile f = openBinaryFile f ReadMode >>= hGetContents
 1265 
 1266 -- | Write a 'ByteString' to a file.
 1267 --
 1268 writeFile :: FilePath -> ByteString -> IO ()
 1269 writeFile f txt = bracket (openBinaryFile f WriteMode) hClose
 1270     (\hdl -> hPut hdl txt)
 1271 
 1272 -- | Append a 'ByteString' to a file.
 1273 --
 1274 appendFile :: FilePath -> ByteString -> IO ()
 1275 appendFile f txt = bracket (openBinaryFile f AppendMode) hClose
 1276     (\hdl -> hPut hdl txt)
 1277 
 1278 -- | getContents. Equivalent to hGetContents stdin. Will read /lazily/
 1279 --
 1280 getContents :: IO ByteString
 1281 getContents = hGetContents stdin
 1282 
 1283 -- | Outputs a 'ByteString' to the specified 'Handle'.
 1284 --
 1285 hPut :: Handle -> ByteString -> IO ()
 1286 hPut h cs = foldrChunks (\c rest -> S.hPut h c >> rest) (return ()) cs
 1287 
 1288 -- | Similar to 'hPut' except that it will never block. Instead it returns
 1289 -- any tail that did not get written. This tail may be 'empty' in the case that
 1290 -- the whole string was written, or the whole original string if nothing was
 1291 -- written. Partial writes are also possible.
 1292 --
 1293 -- Note: on Windows and with Haskell implementation other than GHC, this
 1294 -- function does not work correctly; it behaves identically to 'hPut'.
 1295 --
 1296 hPutNonBlocking :: Handle -> ByteString -> IO ByteString
 1297 hPutNonBlocking _ Empty           = return Empty
 1298 hPutNonBlocking h bs@(Chunk c cs) = do
 1299   c' <- S.hPutNonBlocking h c
 1300   case S.length c' of
 1301     l' | l' == S.length c -> hPutNonBlocking h cs
 1302     0                     -> return bs
 1303     _                     -> return (Chunk c' cs)
 1304 
 1305 -- | A synonym for @hPut@, for compatibility
 1306 --
 1307 hPutStr :: Handle -> ByteString -> IO ()
 1308 hPutStr = hPut
 1309 
 1310 -- | Write a ByteString to stdout
 1311 putStr :: ByteString -> IO ()
 1312 putStr = hPut stdout
 1313 
 1314 -- | Write a ByteString to stdout, appending a newline byte
 1315 --
 1316 putStrLn :: ByteString -> IO ()
 1317 putStrLn ps = hPut stdout ps >> hPut stdout (singleton 0x0a)
 1318 
 1319 {-# DEPRECATED putStrLn
 1320     "Use Data.ByteString.Lazy.Char8.putStrLn instead. (Functions that rely on ASCII encodings belong in Data.ByteString.Lazy.Char8)"
 1321   #-}
 1322 
 1323 -- | The interact function takes a function of type @ByteString -> ByteString@
 1324 -- as its argument. The entire input from the standard input device is passed
 1325 -- to this function as its argument, and the resulting string is output on the
 1326 -- standard output device.
 1327 --
 1328 interact :: (ByteString -> ByteString) -> IO ()
 1329 interact transformer = putStr . transformer =<< getContents
 1330 
 1331 -- ---------------------------------------------------------------------
 1332 -- Internal utilities
 1333 
 1334 -- Common up near identical calls to `error' to reduce the number
 1335 -- constant strings created when compiled:
 1336 errorEmptyList :: String -> a
 1337 errorEmptyList fun = moduleError fun "empty ByteString"
 1338 {-# NOINLINE errorEmptyList #-}
 1339 
 1340 moduleError :: String -> String -> a
 1341 moduleError fun msg = error ("Data.ByteString.Lazy." ++ fun ++ ':':' ':msg)
 1342 {-# NOINLINE moduleError #-}
 1343 
 1344 
 1345 -- reverse a list of non-empty chunks into a lazy ByteString
 1346 revNonEmptyChunks :: [P.ByteString] -> ByteString
 1347 revNonEmptyChunks cs = L.foldl' (flip Chunk) Empty cs
 1348 
 1349 -- reverse a list of possibly-empty chunks into a lazy ByteString
 1350 revChunks :: [P.ByteString] -> ByteString
 1351 revChunks cs = L.foldl' (flip chunk) Empty cs
 1352 
 1353 -- | 'findIndexOrEnd' is a variant of findIndex, that returns the length
 1354 -- of the string if no element is found, rather than Nothing.
 1355 findIndexOrEnd :: (Word8 -> Bool) -> P.ByteString -> Int
 1356 findIndexOrEnd k (S.PS x s l) =
 1357     S.accursedUnutterablePerformIO $
 1358       withForeignPtr x $ \f -> go (f `plusPtr` s) 0
 1359   where
 1360     go !ptr !n | n >= l    = return l
 1361                | otherwise = do w <- peek ptr
 1362                                 if k w
 1363                                   then return n
 1364                                   else go (ptr `plusPtr` 1) (n+1)
 1365 {-# INLINE findIndexOrEnd #-}