1 {-# LANGUAGE CPP #-}
    2 {-# LANGUAGE MagicHash, UnboxedTuples,
    3             NamedFieldPuns, BangPatterns #-}
    4 {-# OPTIONS_HADDOCK prune #-}
    5 #if __GLASGOW_HASKELL__ >= 701
    6 {-# LANGUAGE Trustworthy #-}
    7 #endif
    8 
    9 -- |
   10 -- Module      : Data.ByteString
   11 -- Copyright   : (c) The University of Glasgow 2001,
   12 --               (c) David Roundy 2003-2005,
   13 --               (c) Simon Marlow 2005,
   14 --               (c) Bjorn Bringert 2006,
   15 --               (c) Don Stewart 2005-2008,
   16 --               (c) Duncan Coutts 2006-2013
   17 -- License     : BSD-style
   18 --
   19 -- Maintainer  : dons00@gmail.com, duncan@community.haskell.org
   20 -- Stability   : stable
   21 -- Portability : portable
   22 --
   23 -- A time and space-efficient implementation of byte vectors using
   24 -- packed Word8 arrays, suitable for high performance use, both in terms
   25 -- of large data quantities, or high speed requirements. Byte vectors
   26 -- are encoded as strict 'Word8' arrays of bytes, held in a 'ForeignPtr',
   27 -- and can be passed between C and Haskell with little effort.
   28 --
   29 -- The recomended way to assemble ByteStrings from smaller parts
   30 -- is to use the builder monoid from "Data.ByteString.Builder".
   31 --
   32 -- This module is intended to be imported @qualified@, to avoid name
   33 -- clashes with "Prelude" functions.  eg.
   34 --
   35 -- > import qualified Data.ByteString as B
   36 --
   37 -- Original GHC implementation by Bryan O\'Sullivan.
   38 -- Rewritten to use 'Data.Array.Unboxed.UArray' by Simon Marlow.
   39 -- Rewritten to support slices and use 'ForeignPtr' by David Roundy.
   40 -- Rewritten again and extended by Don Stewart and Duncan Coutts.
   41 --
   42 
   43 module Data.ByteString (
   44 
   45         -- * The @ByteString@ type
   46         ByteString,             -- abstract, instances: Eq, Ord, Show, Read, Data, Typeable, Monoid
   47 
   48         -- * Introducing and eliminating 'ByteString's
   49         empty,                  -- :: ByteString
   50         singleton,              -- :: Word8   -> ByteString
   51         pack,                   -- :: [Word8] -> ByteString
   52         unpack,                 -- :: ByteString -> [Word8]
   53 
   54         -- * Basic interface
   55         cons,                   -- :: Word8 -> ByteString -> ByteString
   56         snoc,                   -- :: ByteString -> Word8 -> ByteString
   57         append,                 -- :: ByteString -> ByteString -> ByteString
   58         head,                   -- :: ByteString -> Word8
   59         uncons,                 -- :: ByteString -> Maybe (Word8, ByteString)
   60         unsnoc,                 -- :: ByteString -> Maybe (ByteString, Word8)
   61         last,                   -- :: ByteString -> Word8
   62         tail,                   -- :: ByteString -> ByteString
   63         init,                   -- :: ByteString -> ByteString
   64         null,                   -- :: ByteString -> Bool
   65         length,                 -- :: ByteString -> Int
   66 
   67         -- * Transforming ByteStrings
   68         map,                    -- :: (Word8 -> Word8) -> ByteString -> ByteString
   69         reverse,                -- :: ByteString -> ByteString
   70         intersperse,            -- :: Word8 -> ByteString -> ByteString
   71         intercalate,            -- :: ByteString -> [ByteString] -> ByteString
   72         transpose,              -- :: [ByteString] -> [ByteString]
   73 
   74         -- * Reducing 'ByteString's (folds)
   75         foldl,                  -- :: (a -> Word8 -> a) -> a -> ByteString -> a
   76         foldl',                 -- :: (a -> Word8 -> a) -> a -> ByteString -> a
   77         foldl1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
   78         foldl1',                -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
   79 
   80         foldr,                  -- :: (Word8 -> a -> a) -> a -> ByteString -> a
   81         foldr',                 -- :: (Word8 -> a -> a) -> a -> ByteString -> a
   82         foldr1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
   83         foldr1',                -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
   84 
   85         -- ** Special folds
   86         concat,                 -- :: [ByteString] -> ByteString
   87         concatMap,              -- :: (Word8 -> ByteString) -> ByteString -> ByteString
   88         any,                    -- :: (Word8 -> Bool) -> ByteString -> Bool
   89         all,                    -- :: (Word8 -> Bool) -> ByteString -> Bool
   90         maximum,                -- :: ByteString -> Word8
   91         minimum,                -- :: ByteString -> Word8
   92 
   93         -- * Building ByteStrings
   94         -- ** Scans
   95         scanl,                  -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
   96         scanl1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
   97         scanr,                  -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
   98         scanr1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
   99 
  100         -- ** Accumulating maps
  101         mapAccumL,              -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
  102         mapAccumR,              -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
  103 
  104         -- ** Generating and unfolding ByteStrings
  105         replicate,              -- :: Int -> Word8 -> ByteString
  106         unfoldr,                -- :: (a -> Maybe (Word8, a)) -> a -> ByteString
  107         unfoldrN,               -- :: Int -> (a -> Maybe (Word8, a)) -> a -> (ByteString, Maybe a)
  108 
  109         -- * Substrings
  110 
  111         -- ** Breaking strings
  112         take,                   -- :: Int -> ByteString -> ByteString
  113         drop,                   -- :: Int -> ByteString -> ByteString
  114         splitAt,                -- :: Int -> ByteString -> (ByteString, ByteString)
  115         takeWhile,              -- :: (Word8 -> Bool) -> ByteString -> ByteString
  116         dropWhile,              -- :: (Word8 -> Bool) -> ByteString -> ByteString
  117         span,                   -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
  118         spanEnd,                -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
  119         break,                  -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
  120         breakEnd,               -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
  121         group,                  -- :: ByteString -> [ByteString]
  122         groupBy,                -- :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
  123         inits,                  -- :: ByteString -> [ByteString]
  124         tails,                  -- :: ByteString -> [ByteString]
  125         stripPrefix,            -- :: ByteString -> ByteString -> Maybe ByteString
  126         stripSuffix,            -- :: ByteString -> ByteString -> Maybe ByteString
  127 
  128         -- ** Breaking into many substrings
  129         split,                  -- :: Word8 -> ByteString -> [ByteString]
  130         splitWith,              -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
  131 
  132         -- * Predicates
  133         isPrefixOf,             -- :: ByteString -> ByteString -> Bool
  134         isSuffixOf,             -- :: ByteString -> ByteString -> Bool
  135         isInfixOf,              -- :: ByteString -> ByteString -> Bool
  136 
  137         -- ** Search for arbitrary substrings
  138         breakSubstring,         -- :: ByteString -> ByteString -> (ByteString,ByteString)
  139         findSubstring,          -- :: ByteString -> ByteString -> Maybe Int
  140         findSubstrings,         -- :: ByteString -> ByteString -> [Int]
  141 
  142         -- * Searching ByteStrings
  143 
  144         -- ** Searching by equality
  145         elem,                   -- :: Word8 -> ByteString -> Bool
  146         notElem,                -- :: Word8 -> ByteString -> Bool
  147 
  148         -- ** Searching with a predicate
  149         find,                   -- :: (Word8 -> Bool) -> ByteString -> Maybe Word8
  150         filter,                 -- :: (Word8 -> Bool) -> ByteString -> ByteString
  151         partition,              -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
  152 
  153         -- * Indexing ByteStrings
  154         index,                  -- :: ByteString -> Int -> Word8
  155         elemIndex,              -- :: Word8 -> ByteString -> Maybe Int
  156         elemIndices,            -- :: Word8 -> ByteString -> [Int]
  157         elemIndexEnd,           -- :: Word8 -> ByteString -> Maybe Int
  158         findIndex,              -- :: (Word8 -> Bool) -> ByteString -> Maybe Int
  159         findIndices,            -- :: (Word8 -> Bool) -> ByteString -> [Int]
  160         count,                  -- :: Word8 -> ByteString -> Int
  161 
  162         -- * Zipping and unzipping ByteStrings
  163         zip,                    -- :: ByteString -> ByteString -> [(Word8,Word8)]
  164         zipWith,                -- :: (Word8 -> Word8 -> c) -> ByteString -> ByteString -> [c]
  165         unzip,                  -- :: [(Word8,Word8)] -> (ByteString,ByteString)
  166 
  167         -- * Ordered ByteStrings
  168         sort,                   -- :: ByteString -> ByteString
  169 
  170         -- * Low level conversions
  171         -- ** Copying ByteStrings
  172         copy,                   -- :: ByteString -> ByteString
  173 
  174         -- ** Packing 'CString's and pointers
  175         packCString,            -- :: CString -> IO ByteString
  176         packCStringLen,         -- :: CStringLen -> IO ByteString
  177 
  178         -- ** Using ByteStrings as 'CString's
  179         useAsCString,           -- :: ByteString -> (CString    -> IO a) -> IO a
  180         useAsCStringLen,        -- :: ByteString -> (CStringLen -> IO a) -> IO a
  181 
  182         -- * I\/O with 'ByteString's
  183 
  184         -- ** Standard input and output
  185         getLine,                -- :: IO ByteString
  186         getContents,            -- :: IO ByteString
  187         putStr,                 -- :: ByteString -> IO ()
  188         putStrLn,               -- :: ByteString -> IO ()
  189         interact,               -- :: (ByteString -> ByteString) -> IO ()
  190 
  191         -- ** Files
  192         readFile,               -- :: FilePath -> IO ByteString
  193         writeFile,              -- :: FilePath -> ByteString -> IO ()
  194         appendFile,             -- :: FilePath -> ByteString -> IO ()
  195 
  196         -- ** I\/O with Handles
  197         hGetLine,               -- :: Handle -> IO ByteString
  198         hGetContents,           -- :: Handle -> IO ByteString
  199         hGet,                   -- :: Handle -> Int -> IO ByteString
  200         hGetSome,               -- :: Handle -> Int -> IO ByteString
  201         hGetNonBlocking,        -- :: Handle -> Int -> IO ByteString
  202         hPut,                   -- :: Handle -> ByteString -> IO ()
  203         hPutNonBlocking,        -- :: Handle -> ByteString -> IO ByteString
  204         hPutStr,                -- :: Handle -> ByteString -> IO ()
  205         hPutStrLn,              -- :: Handle -> ByteString -> IO ()
  206 
  207         breakByte
  208 
  209   ) where
  210 
  211 import qualified Prelude as P
  212 import Prelude hiding           (reverse,head,tail,last,init,null
  213                                 ,length,map,lines,foldl,foldr,unlines
  214                                 ,concat,any,take,drop,splitAt,takeWhile
  215                                 ,dropWhile,span,break,elem,filter,maximum
  216                                 ,minimum,all,concatMap,foldl1,foldr1
  217                                 ,scanl,scanl1,scanr,scanr1
  218                                 ,readFile,writeFile,appendFile,replicate
  219                                 ,getContents,getLine,putStr,putStrLn,interact
  220                                 ,zip,zipWith,unzip,notElem)
  221 
  222 #if MIN_VERSION_base(4,7,0)
  223 import Data.Bits                (finiteBitSize, shiftL, (.|.), (.&.))
  224 #else
  225 import Data.Bits                (bitSize, shiftL, (.|.), (.&.))
  226 #endif
  227 
  228 import Data.ByteString.Internal
  229 import Data.ByteString.Unsafe
  230 
  231 import qualified Data.List as List
  232 
  233 import Data.Word                (Word8)
  234 import Data.Maybe               (isJust)
  235 
  236 import Control.Exception        (finally, bracket, assert, throwIO)
  237 import Control.Monad            (when)
  238 
  239 import Foreign.C.String         (CString, CStringLen)
  240 import Foreign.C.Types          (CSize)
  241 import Foreign.ForeignPtr       (ForeignPtr, withForeignPtr, touchForeignPtr)
  242 #if MIN_VERSION_base(4,5,0)
  243 import Foreign.ForeignPtr.Unsafe(unsafeForeignPtrToPtr)
  244 #else
  245 import Foreign.ForeignPtr       (unsafeForeignPtrToPtr)
  246 #endif
  247 import Foreign.Marshal.Alloc    (allocaBytes)
  248 import Foreign.Marshal.Array    (allocaArray)
  249 import Foreign.Ptr
  250 import Foreign.Storable         (Storable(..))
  251 
  252 -- hGetBuf and hPutBuf not available in yhc or nhc
  253 import System.IO                (stdin,stdout,hClose,hFileSize
  254                                 ,hGetBuf,hPutBuf,openBinaryFile
  255                                 ,IOMode(..))
  256 import System.IO.Error          (mkIOError, illegalOperationErrorType)
  257 
  258 #if !(MIN_VERSION_base(4,8,0))
  259 import Data.Monoid              (Monoid(..))
  260 #endif
  261 
  262 
  263 import System.IO                (hGetBufNonBlocking, hPutBufNonBlocking)
  264 
  265 #if MIN_VERSION_base(4,3,0)
  266 import System.IO                (hGetBufSome)
  267 #else
  268 import System.IO                (hWaitForInput, hIsEOF)
  269 #endif
  270 
  271 import Data.IORef
  272 import GHC.IO.Handle.Internals
  273 import GHC.IO.Handle.Types
  274 import GHC.IO.Buffer
  275 import GHC.IO.BufferedIO as Buffered
  276 import GHC.IO                   (unsafePerformIO, unsafeDupablePerformIO)
  277 import Data.Char                (ord)
  278 import Foreign.Marshal.Utils    (copyBytes)
  279 
  280 import GHC.Prim                 (Word#)
  281 import GHC.Base                 (build)
  282 import GHC.Word hiding (Word8)
  283 
  284 #if !(MIN_VERSION_base(4,7,0))
  285 finiteBitSize = bitSize
  286 #endif
  287 
  288 -- -----------------------------------------------------------------------------
  289 -- Introducing and eliminating 'ByteString's
  290 
  291 -- | /O(1)/ The empty 'ByteString'
  292 empty :: ByteString
  293 empty = PS nullForeignPtr 0 0
  294 
  295 -- | /O(1)/ Convert a 'Word8' into a 'ByteString'
  296 singleton :: Word8 -> ByteString
  297 singleton c = unsafeCreate 1 $ \p -> poke p c
  298 {-# INLINE [1] singleton #-}
  299 
  300 -- Inline [1] for intercalate rule
  301 
  302 --
  303 -- XXX The use of unsafePerformIO in allocating functions (unsafeCreate) is critical!
  304 --
  305 -- Otherwise:
  306 --
  307 --  singleton 255 `compare` singleton 127
  308 --
  309 -- is compiled to:
  310 --
  311 --  case mallocByteString 2 of
  312 --      ForeignPtr f internals ->
  313 --           case writeWord8OffAddr# f 0 255 of _ ->
  314 --           case writeWord8OffAddr# f 0 127 of _ ->
  315 --           case eqAddr# f f of
  316 --                  False -> case compare (GHC.Prim.plusAddr# f 0)
  317 --                                        (GHC.Prim.plusAddr# f 0)
  318 --
  319 --
  320 
  321 -- | /O(n)/ Convert a '[Word8]' into a 'ByteString'.
  322 --
  323 -- For applications with large numbers of string literals, pack can be a
  324 -- bottleneck. In such cases, consider using packAddress (GHC only).
  325 pack :: [Word8] -> ByteString
  326 pack = packBytes
  327 
  328 -- | /O(n)/ Converts a 'ByteString' to a '[Word8]'.
  329 unpack :: ByteString -> [Word8]
  330 unpack bs = build (unpackFoldr bs)
  331 {-# INLINE unpack #-}
  332 
  333 --
  334 -- Have unpack fuse with good list consumers
  335 --
  336 unpackFoldr :: ByteString -> (Word8 -> a -> a) -> a -> a
  337 unpackFoldr bs k z = foldr k z bs
  338 {-# INLINE [0] unpackFoldr #-}
  339 
  340 {-# RULES
  341 "ByteString unpack-list" [1]  forall bs .
  342     unpackFoldr bs (:) [] = unpackBytes bs
  343  #-}
  344 
  345 -- ---------------------------------------------------------------------
  346 -- Basic interface
  347 
  348 -- | /O(1)/ Test whether a ByteString is empty.
  349 null :: ByteString -> Bool
  350 null (PS _ _ l) = assert (l >= 0) $ l <= 0
  351 {-# INLINE null #-}
  352 
  353 -- ---------------------------------------------------------------------
  354 -- | /O(1)/ 'length' returns the length of a ByteString as an 'Int'.
  355 length :: ByteString -> Int
  356 length (PS _ _ l) = assert (l >= 0) $ l
  357 {-# INLINE length #-}
  358 
  359 ------------------------------------------------------------------------
  360 
  361 infixr 5 `cons` --same as list (:)
  362 infixl 5 `snoc`
  363 
  364 -- | /O(n)/ 'cons' is analogous to (:) for lists, but of different
  365 -- complexity, as it requires making a copy.
  366 cons :: Word8 -> ByteString -> ByteString
  367 cons c (PS x s l) = unsafeCreate (l+1) $ \p -> withForeignPtr x $ \f -> do
  368         poke p c
  369         memcpy (p `plusPtr` 1) (f `plusPtr` s) (fromIntegral l)
  370 {-# INLINE cons #-}
  371 
  372 -- | /O(n)/ Append a byte to the end of a 'ByteString'
  373 snoc :: ByteString -> Word8 -> ByteString
  374 snoc (PS x s l) c = unsafeCreate (l+1) $ \p -> withForeignPtr x $ \f -> do
  375         memcpy p (f `plusPtr` s) (fromIntegral l)
  376         poke (p `plusPtr` l) c
  377 {-# INLINE snoc #-}
  378 
  379 -- todo fuse
  380 
  381 -- | /O(1)/ Extract the first element of a ByteString, which must be non-empty.
  382 -- An exception will be thrown in the case of an empty ByteString.
  383 head :: ByteString -> Word8
  384 head (PS x s l)
  385     | l <= 0    = errorEmptyList "head"
  386     | otherwise = accursedUnutterablePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
  387 {-# INLINE head #-}
  388 
  389 -- | /O(1)/ Extract the elements after the head of a ByteString, which must be non-empty.
  390 -- An exception will be thrown in the case of an empty ByteString.
  391 tail :: ByteString -> ByteString
  392 tail (PS p s l)
  393     | l <= 0    = errorEmptyList "tail"
  394     | otherwise = PS p (s+1) (l-1)
  395 {-# INLINE tail #-}
  396 
  397 -- | /O(1)/ Extract the head and tail of a ByteString, returning Nothing
  398 -- if it is empty.
  399 uncons :: ByteString -> Maybe (Word8, ByteString)
  400 uncons (PS x s l)
  401     | l <= 0    = Nothing
  402     | otherwise = Just (accursedUnutterablePerformIO $ withForeignPtr x
  403                                                      $ \p -> peekByteOff p s,
  404                         PS x (s+1) (l-1))
  405 {-# INLINE uncons #-}
  406 
  407 -- | /O(1)/ Extract the last element of a ByteString, which must be finite and non-empty.
  408 -- An exception will be thrown in the case of an empty ByteString.
  409 last :: ByteString -> Word8
  410 last ps@(PS x s l)
  411     | null ps   = errorEmptyList "last"
  412     | otherwise = accursedUnutterablePerformIO $
  413                     withForeignPtr x $ \p -> peekByteOff p (s+l-1)
  414 {-# INLINE last #-}
  415 
  416 -- | /O(1)/ Return all the elements of a 'ByteString' except the last one.
  417 -- An exception will be thrown in the case of an empty ByteString.
  418 init :: ByteString -> ByteString
  419 init ps@(PS p s l)
  420     | null ps   = errorEmptyList "init"
  421     | otherwise = PS p s (l-1)
  422 {-# INLINE init #-}
  423 
  424 -- | /O(1)/ Extract the 'init' and 'last' of a ByteString, returning Nothing
  425 -- if it is empty.
  426 unsnoc :: ByteString -> Maybe (ByteString, Word8)
  427 unsnoc (PS x s l)
  428     | l <= 0    = Nothing
  429     | otherwise = Just (PS x s (l-1),
  430                         accursedUnutterablePerformIO $
  431                           withForeignPtr x $ \p -> peekByteOff p (s+l-1))
  432 {-# INLINE unsnoc #-}
  433 
  434 -- | /O(n)/ Append two ByteStrings
  435 append :: ByteString -> ByteString -> ByteString
  436 append = mappend
  437 {-# INLINE append #-}
  438 
  439 -- ---------------------------------------------------------------------
  440 -- Transformations
  441 
  442 -- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each
  443 -- element of @xs@.
  444 map :: (Word8 -> Word8) -> ByteString -> ByteString
  445 map f (PS fp s len) = unsafeDupablePerformIO $ withForeignPtr fp $ \a ->
  446     create len $ map_ 0 (a `plusPtr` s)
  447   where
  448     map_ :: Int -> Ptr Word8 -> Ptr Word8 -> IO ()
  449     map_ !n !p1 !p2
  450        | n >= len = return ()
  451        | otherwise = do
  452             x <- peekByteOff p1 n
  453             pokeByteOff p2 n (f x)
  454             map_ (n+1) p1 p2
  455 {-# INLINE map #-}
  456 
  457 -- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order.
  458 reverse :: ByteString -> ByteString
  459 reverse (PS x s l) = unsafeCreate l $ \p -> withForeignPtr x $ \f ->
  460         c_reverse p (f `plusPtr` s) (fromIntegral l)
  461 
  462 -- | /O(n)/ The 'intersperse' function takes a 'Word8' and a
  463 -- 'ByteString' and \`intersperses\' that byte between the elements of
  464 -- the 'ByteString'.  It is analogous to the intersperse function on
  465 -- Lists.
  466 intersperse :: Word8 -> ByteString -> ByteString
  467 intersperse c ps@(PS x s l)
  468     | length ps < 2  = ps
  469     | otherwise      = unsafeCreate (2*l-1) $ \p -> withForeignPtr x $ \f ->
  470         c_intersperse p (f `plusPtr` s) (fromIntegral l) c
  471 
  472 -- | The 'transpose' function transposes the rows and columns of its
  473 -- 'ByteString' argument.
  474 transpose :: [ByteString] -> [ByteString]
  475 transpose ps = P.map pack (List.transpose (P.map unpack ps))
  476 
  477 -- ---------------------------------------------------------------------
  478 -- Reducing 'ByteString's
  479 
  480 -- | 'foldl', applied to a binary operator, a starting value (typically
  481 -- the left-identity of the operator), and a ByteString, reduces the
  482 -- ByteString using the binary operator, from left to right.
  483 --
  484 foldl :: (a -> Word8 -> a) -> a -> ByteString -> a
  485 foldl f z (PS fp off len) =
  486       let p = unsafeForeignPtrToPtr fp
  487        in go (p `plusPtr` (off+len-1)) (p `plusPtr` (off-1))
  488     where
  489       -- not tail recursive; traverses array right to left
  490       go !p !q | p == q    = z
  491                | otherwise = let !x = accursedUnutterablePerformIO $ do
  492                                         x' <- peek p
  493                                         touchForeignPtr fp
  494                                         return x'
  495                              in f (go (p `plusPtr` (-1)) q) x
  496 {-# INLINE foldl #-}
  497 
  498 -- | 'foldl'' is like 'foldl', but strict in the accumulator.
  499 --
  500 foldl' :: (a -> Word8 -> a) -> a -> ByteString -> a
  501 foldl' f v (PS fp off len) =
  502       accursedUnutterablePerformIO $ withForeignPtr fp $ \p ->
  503         go v (p `plusPtr` off) (p `plusPtr` (off+len))
  504     where
  505       -- tail recursive; traverses array left to right
  506       go !z !p !q | p == q    = return z
  507                   | otherwise = do x <- peek p
  508                                    go (f z x) (p `plusPtr` 1) q
  509 {-# INLINE foldl' #-}
  510 
  511 -- | 'foldr', applied to a binary operator, a starting value
  512 -- (typically the right-identity of the operator), and a ByteString,
  513 -- reduces the ByteString using the binary operator, from right to left.
  514 foldr :: (Word8 -> a -> a) -> a -> ByteString -> a
  515 foldr k z (PS fp off len) =
  516       let p = unsafeForeignPtrToPtr fp
  517        in go (p `plusPtr` off) (p `plusPtr` (off+len))
  518     where
  519       -- not tail recursive; traverses array left to right
  520       go !p !q | p == q    = z
  521                | otherwise = let !x = accursedUnutterablePerformIO $ do
  522                                         x' <- peek p
  523                                         touchForeignPtr fp
  524                                         return x'
  525                               in k x (go (p `plusPtr` 1) q)
  526 {-# INLINE foldr #-}
  527 
  528 -- | 'foldr'' is like 'foldr', but strict in the accumulator.
  529 foldr' :: (Word8 -> a -> a) -> a -> ByteString -> a
  530 foldr' k v (PS fp off len) =
  531       accursedUnutterablePerformIO $ withForeignPtr fp $ \p ->
  532         go v (p `plusPtr` (off+len-1)) (p `plusPtr` (off-1))
  533     where
  534       -- tail recursive; traverses array right to left
  535       go !z !p !q | p == q    = return z
  536                   | otherwise = do x <- peek p
  537                                    go (k x z) (p `plusPtr` (-1)) q
  538 {-# INLINE foldr' #-}
  539 
  540 -- | 'foldl1' is a variant of 'foldl' that has no starting value
  541 -- argument, and thus must be applied to non-empty 'ByteStrings'.
  542 -- An exception will be thrown in the case of an empty ByteString.
  543 foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  544 foldl1 f ps
  545     | null ps   = errorEmptyList "foldl1"
  546     | otherwise = foldl f (unsafeHead ps) (unsafeTail ps)
  547 {-# INLINE foldl1 #-}
  548 
  549 -- | 'foldl1\'' is like 'foldl1', but strict in the accumulator.
  550 -- An exception will be thrown in the case of an empty ByteString.
  551 foldl1' :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  552 foldl1' f ps
  553     | null ps   = errorEmptyList "foldl1'"
  554     | otherwise = foldl' f (unsafeHead ps) (unsafeTail ps)
  555 {-# INLINE foldl1' #-}
  556 
  557 -- | 'foldr1' is a variant of 'foldr' that has no starting value argument,
  558 -- and thus must be applied to non-empty 'ByteString's
  559 -- An exception will be thrown in the case of an empty ByteString.
  560 foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  561 foldr1 f ps
  562     | null ps        = errorEmptyList "foldr1"
  563     | otherwise      = foldr f (unsafeLast ps) (unsafeInit ps)
  564 {-# INLINE foldr1 #-}
  565 
  566 -- | 'foldr1\'' is a variant of 'foldr1', but is strict in the
  567 -- accumulator.
  568 foldr1' :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  569 foldr1' f ps
  570     | null ps        = errorEmptyList "foldr1"
  571     | otherwise      = foldr' f (unsafeLast ps) (unsafeInit ps)
  572 {-# INLINE foldr1' #-}
  573 
  574 -- ---------------------------------------------------------------------
  575 -- Special folds
  576 
  577 -- | /O(n)/ Concatenate a list of ByteStrings.
  578 concat :: [ByteString] -> ByteString
  579 concat = mconcat
  580 
  581 -- | Map a function over a 'ByteString' and concatenate the results
  582 concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString
  583 concatMap f = concat . foldr ((:) . f) []
  584 
  585 -- foldr (append . f) empty
  586 
  587 -- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if
  588 -- any element of the 'ByteString' satisfies the predicate.
  589 any :: (Word8 -> Bool) -> ByteString -> Bool
  590 any _ (PS _ _ 0) = False
  591 any f (PS x s l) = accursedUnutterablePerformIO $ withForeignPtr x $ \ptr ->
  592         go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
  593     where
  594         go !p !q | p == q    = return False
  595                  | otherwise = do c <- peek p
  596                                   if f c then return True
  597                                          else go (p `plusPtr` 1) q
  598 {-# INLINE any #-}
  599 
  600 -- todo fuse
  601 
  602 -- | /O(n)/ Applied to a predicate and a 'ByteString', 'all' determines
  603 -- if all elements of the 'ByteString' satisfy the predicate.
  604 all :: (Word8 -> Bool) -> ByteString -> Bool
  605 all _ (PS _ _ 0) = True
  606 all f (PS x s l) = accursedUnutterablePerformIO $ withForeignPtr x $ \ptr ->
  607         go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
  608     where
  609         go !p !q | p == q     = return True  -- end of list
  610                  | otherwise  = do c <- peek p
  611                                    if f c
  612                                       then go (p `plusPtr` 1) q
  613                                       else return False
  614 {-# INLINE all #-}
  615 
  616 ------------------------------------------------------------------------
  617 
  618 -- | /O(n)/ 'maximum' returns the maximum value from a 'ByteString'
  619 -- This function will fuse.
  620 -- An exception will be thrown in the case of an empty ByteString.
  621 maximum :: ByteString -> Word8
  622 maximum xs@(PS x s l)
  623     | null xs   = errorEmptyList "maximum"
  624     | otherwise = accursedUnutterablePerformIO $ withForeignPtr x $ \p ->
  625                       c_maximum (p `plusPtr` s) (fromIntegral l)
  626 {-# INLINE maximum #-}
  627 
  628 -- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString'
  629 -- This function will fuse.
  630 -- An exception will be thrown in the case of an empty ByteString.
  631 minimum :: ByteString -> Word8
  632 minimum xs@(PS x s l)
  633     | null xs   = errorEmptyList "minimum"
  634     | otherwise = accursedUnutterablePerformIO $ withForeignPtr x $ \p ->
  635                       c_minimum (p `plusPtr` s) (fromIntegral l)
  636 {-# INLINE minimum #-}
  637 
  638 ------------------------------------------------------------------------
  639 
  640 -- | The 'mapAccumL' function behaves like a combination of 'map' and
  641 -- 'foldl'; it applies a function to each element of a ByteString,
  642 -- passing an accumulating parameter from left to right, and returning a
  643 -- final value of this accumulator together with the new list.
  644 mapAccumL :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
  645 mapAccumL f acc (PS fp o len) = unsafeDupablePerformIO $ withForeignPtr fp $ \a -> do
  646     gp   <- mallocByteString len
  647     acc' <- withForeignPtr gp $ \p -> mapAccumL_ acc 0 (a `plusPtr` o) p
  648     return $! (acc', PS gp 0 len)
  649   where
  650     mapAccumL_ !s !n !p1 !p2
  651        | n >= len = return s
  652        | otherwise = do
  653             x <- peekByteOff p1 n
  654             let (s', y) = f s x
  655             pokeByteOff p2 n y
  656             mapAccumL_ s' (n+1) p1 p2
  657 {-# INLINE mapAccumL #-}
  658 
  659 -- | The 'mapAccumR' function behaves like a combination of 'map' and
  660 -- 'foldr'; it applies a function to each element of a ByteString,
  661 -- passing an accumulating parameter from right to left, and returning a
  662 -- final value of this accumulator together with the new ByteString.
  663 mapAccumR :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
  664 mapAccumR f acc (PS fp o len) = unsafeDupablePerformIO $ withForeignPtr fp $ \a -> do
  665     gp   <- mallocByteString len
  666     acc' <- withForeignPtr gp $ \p -> mapAccumR_ acc (len-1) (a `plusPtr` o) p
  667     return $! (acc', PS gp 0 len)
  668   where
  669     mapAccumR_ !s !n !p !q
  670        | n <  0    = return s
  671        | otherwise = do
  672             x  <- peekByteOff p n
  673             let (s', y) = f s x
  674             pokeByteOff q n y
  675             mapAccumR_ s' (n-1) p q
  676 {-# INLINE mapAccumR #-}
  677 
  678 -- ---------------------------------------------------------------------
  679 -- Building ByteStrings
  680 
  681 -- | 'scanl' is similar to 'foldl', but returns a list of successive
  682 -- reduced values from the left. This function will fuse.
  683 --
  684 -- > scanl f z [x1, x2, ...] == [z, z `f` x1, (z `f` x1) `f` x2, ...]
  685 --
  686 -- Note that
  687 --
  688 -- > last (scanl f z xs) == foldl f z xs.
  689 --
  690 scanl :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
  691 
  692 scanl f v (PS fp s len) = unsafeDupablePerformIO $ withForeignPtr fp $ \a ->
  693     create (len+1) $ \q -> do
  694         poke q v
  695         scanl_ v 0 (a `plusPtr` s) (q `plusPtr` 1)
  696   where
  697     scanl_ !z !n !p !q
  698         | n >= len  = return ()
  699         | otherwise = do
  700             x <- peekByteOff p n
  701             let z' = f z x
  702             pokeByteOff q n z'
  703             scanl_ z' (n+1) p q
  704 {-# INLINE scanl #-}
  705 
  706     -- n.b. haskell's List scan returns a list one bigger than the
  707     -- input, so we need to snoc here to get some extra space, however,
  708     -- it breaks map/up fusion (i.e. scanl . map no longer fuses)
  709 
  710 -- | 'scanl1' is a variant of 'scanl' that has no starting value argument.
  711 -- This function will fuse.
  712 --
  713 -- > scanl1 f [x1, x2, ...] == [x1, x1 `f` x2, ...]
  714 scanl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
  715 scanl1 f ps
  716     | null ps   = empty
  717     | otherwise = scanl f (unsafeHead ps) (unsafeTail ps)
  718 {-# INLINE scanl1 #-}
  719 
  720 -- | scanr is the right-to-left dual of scanl.
  721 scanr :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
  722 scanr f v (PS fp s len) = unsafeDupablePerformIO $ withForeignPtr fp $ \a ->
  723     create (len+1) $ \q -> do
  724         poke (q `plusPtr` len) v
  725         scanr_ v (len-1) (a `plusPtr` s) q
  726   where
  727     scanr_ !z !n !p !q
  728         | n <  0    = return ()
  729         | otherwise = do
  730             x <- peekByteOff p n
  731             let z' = f x z
  732             pokeByteOff q n z'
  733             scanr_ z' (n-1) p q
  734 {-# INLINE scanr #-}
  735 
  736 -- | 'scanr1' is a variant of 'scanr' that has no starting value argument.
  737 scanr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
  738 scanr1 f ps
  739     | null ps   = empty
  740     | otherwise = scanr f (unsafeLast ps) (unsafeInit ps)
  741 {-# INLINE scanr1 #-}
  742 
  743 -- ---------------------------------------------------------------------
  744 -- Unfolds and replicates
  745 
  746 -- | /O(n)/ 'replicate' @n x@ is a ByteString of length @n@ with @x@
  747 -- the value of every element. The following holds:
  748 --
  749 -- > replicate w c = unfoldr w (\u -> Just (u,u)) c
  750 --
  751 -- This implemenation uses @memset(3)@
  752 replicate :: Int -> Word8 -> ByteString
  753 replicate w c
  754     | w <= 0    = empty
  755     | otherwise = unsafeCreate w $ \ptr ->
  756                       memset ptr c (fromIntegral w) >> return ()
  757 
  758 -- | /O(n)/, where /n/ is the length of the result.  The 'unfoldr'
  759 -- function is analogous to the List \'unfoldr\'.  'unfoldr' builds a
  760 -- ByteString from a seed value.  The function takes the element and
  761 -- returns 'Nothing' if it is done producing the ByteString or returns
  762 -- 'Just' @(a,b)@, in which case, @a@ is the next byte in the string,
  763 -- and @b@ is the seed value for further production.
  764 --
  765 -- Examples:
  766 --
  767 -- >    unfoldr (\x -> if x <= 5 then Just (x, x + 1) else Nothing) 0
  768 -- > == pack [0, 1, 2, 3, 4, 5]
  769 --
  770 unfoldr :: (a -> Maybe (Word8, a)) -> a -> ByteString
  771 unfoldr f = concat . unfoldChunk 32 64
  772   where unfoldChunk n n' x =
  773           case unfoldrN n f x of
  774             (s, Nothing) -> s : []
  775             (s, Just x') -> s : unfoldChunk n' (n+n') x'
  776 {-# INLINE unfoldr #-}
  777 
  778 -- | /O(n)/ Like 'unfoldr', 'unfoldrN' builds a ByteString from a seed
  779 -- value.  However, the length of the result is limited by the first
  780 -- argument to 'unfoldrN'.  This function is more efficient than 'unfoldr'
  781 -- when the maximum length of the result is known.
  782 --
  783 -- The following equation relates 'unfoldrN' and 'unfoldr':
  784 --
  785 -- > fst (unfoldrN n f s) == take n (unfoldr f s)
  786 --
  787 unfoldrN :: Int -> (a -> Maybe (Word8, a)) -> a -> (ByteString, Maybe a)
  788 unfoldrN i f x0
  789     | i < 0     = (empty, Just x0)
  790     | otherwise = unsafePerformIO $ createAndTrim' i $ \p -> go p x0 0
  791   where
  792     go !p !x !n
  793       | n == i    = return (0, n, Just x)
  794       | otherwise = case f x of
  795                       Nothing     -> return (0, n, Nothing)
  796                       Just (w,x') -> do poke p w
  797                                         go (p `plusPtr` 1) x' (n+1)
  798 {-# INLINE unfoldrN #-}
  799 
  800 -- ---------------------------------------------------------------------
  801 -- Substrings
  802 
  803 -- | /O(1)/ 'take' @n@, applied to a ByteString @xs@, returns the prefix
  804 -- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@.
  805 take :: Int -> ByteString -> ByteString
  806 take n ps@(PS x s l)
  807     | n <= 0    = empty
  808     | n >= l    = ps
  809     | otherwise = PS x s n
  810 {-# INLINE take #-}
  811 
  812 -- | /O(1)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@
  813 -- elements, or @[]@ if @n > 'length' xs@.
  814 drop  :: Int -> ByteString -> ByteString
  815 drop n ps@(PS x s l)
  816     | n <= 0    = ps
  817     | n >= l    = empty
  818     | otherwise = PS x (s+n) (l-n)
  819 {-# INLINE drop #-}
  820 
  821 -- | /O(1)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@.
  822 splitAt :: Int -> ByteString -> (ByteString, ByteString)
  823 splitAt n ps@(PS x s l)
  824     | n <= 0    = (empty, ps)
  825     | n >= l    = (ps, empty)
  826     | otherwise = (PS x s n, PS x (s+n) (l-n))
  827 {-# INLINE splitAt #-}
  828 
  829 -- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@,
  830 -- returns the longest prefix (possibly empty) of @xs@ of elements that
  831 -- satisfy @p@.
  832 takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString
  833 takeWhile f ps = unsafeTake (findIndexOrEnd (not . f) ps) ps
  834 {-# INLINE takeWhile #-}
  835 
  836 -- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@.
  837 dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString
  838 dropWhile f ps = unsafeDrop (findIndexOrEnd (not . f) ps) ps
  839 {-# INLINE dropWhile #-}
  840 
  841 -- instead of findIndexOrEnd, we could use memchr here.
  842 
  843 -- | 'break' @p@ is equivalent to @'span' ('not' . p)@.
  844 --
  845 -- Under GHC, a rewrite rule will transform break (==) into a
  846 -- call to the specialised breakByte:
  847 --
  848 -- > break ((==) x) = breakByte x
  849 -- > break (==x) = breakByte x
  850 --
  851 break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
  852 break p ps = case findIndexOrEnd p ps of n -> (unsafeTake n ps, unsafeDrop n ps)
  853 {-# INLINE [1] break #-}
  854 
  855 -- See bytestring #70
  856 #if MIN_VERSION_base(4,9,0)
  857 {-# RULES
  858 "ByteString specialise break (x ==)" forall x.
  859     break (x `eqWord8`) = breakByte x
  860 "ByteString specialise break (== x)" forall x.
  861     break (`eqWord8` x) = breakByte x
  862   #-}
  863 #else
  864 {-# RULES
  865 "ByteString specialise break (x ==)" forall x.
  866     break (x ==) = breakByte x
  867 "ByteString specialise break (== x)" forall x.
  868     break (== x) = breakByte x
  869   #-}
  870 #endif
  871 
  872 -- INTERNAL:
  873 
  874 -- | 'breakByte' breaks its ByteString argument at the first occurence
  875 -- of the specified byte. It is more efficient than 'break' as it is
  876 -- implemented with @memchr(3)@. I.e.
  877 --
  878 -- > break (=='c') "abcd" == breakByte 'c' "abcd"
  879 --
  880 breakByte :: Word8 -> ByteString -> (ByteString, ByteString)
  881 breakByte c p = case elemIndex c p of
  882     Nothing -> (p,empty)
  883     Just n  -> (unsafeTake n p, unsafeDrop n p)
  884 {-# INLINE breakByte #-}
  885 {-# DEPRECATED breakByte "It is an internal function and should never have been exported. Use 'break (== x)' instead. (There are rewrite rules that handle this special case of 'break'.)" #-}
  886 
  887 -- | 'breakEnd' behaves like 'break' but from the end of the 'ByteString'
  888 --
  889 -- breakEnd p == spanEnd (not.p)
  890 breakEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
  891 breakEnd  p ps = splitAt (findFromEndUntil p ps) ps
  892 
  893 -- | 'span' @p xs@ breaks the ByteString into two segments. It is
  894 -- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@
  895 span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
  896 span p ps = break (not . p) ps
  897 {-# INLINE [1] span #-}
  898 
  899 -- | 'spanByte' breaks its ByteString argument at the first
  900 -- occurence of a byte other than its argument. It is more efficient
  901 -- than 'span (==)'
  902 --
  903 -- > span  (=='c') "abcd" == spanByte 'c' "abcd"
  904 --
  905 spanByte :: Word8 -> ByteString -> (ByteString, ByteString)
  906 spanByte c ps@(PS x s l) =
  907     accursedUnutterablePerformIO $
  908       withForeignPtr x $ \p ->
  909         go (p `plusPtr` s) 0
  910   where
  911     go !p !i | i >= l    = return (ps, empty)
  912              | otherwise = do c' <- peekByteOff p i
  913                               if c /= c'
  914                                   then return (unsafeTake i ps, unsafeDrop i ps)
  915                                   else go p (i+1)
  916 {-# INLINE spanByte #-}
  917 
  918 -- See bytestring #70
  919 #if MIN_VERSION_base(4,9,0)
  920 {-# RULES
  921 "ByteString specialise span (x ==)" forall x.
  922     span (x `eqWord8`) = spanByte x
  923 "ByteString specialise span (== x)" forall x.
  924     span (`eqWord8` x) = spanByte x
  925   #-}
  926 #else
  927 {-# RULES
  928 "ByteString specialise span (x ==)" forall x.
  929     span (x ==) = spanByte x
  930 "ByteString specialise span (== x)" forall x.
  931     span (== x) = spanByte x
  932   #-}
  933 #endif
  934 
  935 -- | 'spanEnd' behaves like 'span' but from the end of the 'ByteString'.
  936 -- We have
  937 --
  938 -- > spanEnd (not.isSpace) "x y z" == ("x y ","z")
  939 --
  940 -- and
  941 --
  942 -- > spanEnd (not . isSpace) ps
  943 -- >    ==
  944 -- > let (x,y) = span (not.isSpace) (reverse ps) in (reverse y, reverse x)
  945 --
  946 spanEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
  947 spanEnd  p ps = splitAt (findFromEndUntil (not.p) ps) ps
  948 
  949 -- | /O(n)/ Splits a 'ByteString' into components delimited by
  950 -- separators, where the predicate returns True for a separator element.
  951 -- The resulting components do not contain the separators.  Two adjacent
  952 -- separators result in an empty component in the output.  eg.
  953 --
  954 -- > splitWith (=='a') "aabbaca" == ["","","bb","c",""]
  955 -- > splitWith (=='a') []        == []
  956 --
  957 splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString]
  958 splitWith _pred (PS _  _   0) = []
  959 splitWith pred_ (PS fp off len) = splitWith0 pred# off len fp
  960   where pred# c# = pred_ (W8# c#)
  961 
  962         splitWith0 !pred' !off' !len' !fp' =
  963           accursedUnutterablePerformIO $
  964             withForeignPtr fp $ \p ->
  965               splitLoop pred' p 0 off' len' fp'
  966 
  967         splitLoop :: (Word# -> Bool)
  968                   -> Ptr Word8
  969                   -> Int -> Int -> Int
  970                   -> ForeignPtr Word8
  971                   -> IO [ByteString]
  972 
  973         splitLoop pred' p idx' off' len' fp'
  974             | idx' >= len'  = return [PS fp' off' idx']
  975             | otherwise = do
  976                 w <- peekElemOff p (off'+idx')
  977                 if pred' (case w of W8# w# -> w#)
  978                    then return (PS fp' off' idx' :
  979                               splitWith0 pred' (off'+idx'+1) (len'-idx'-1) fp')
  980                    else splitLoop pred' p (idx'+1) off' len' fp'
  981 {-# INLINE splitWith #-}
  982 
  983 -- | /O(n)/ Break a 'ByteString' into pieces separated by the byte
  984 -- argument, consuming the delimiter. I.e.
  985 --
  986 -- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"]
  987 -- > split 'a'  "aXaXaXa"    == ["","X","X","X",""]
  988 -- > split 'x'  "x"          == ["",""]
  989 --
  990 -- and
  991 --
  992 -- > intercalate [c] . split c == id
  993 -- > split == splitWith . (==)
  994 --
  995 -- As for all splitting functions in this library, this function does
  996 -- not copy the substrings, it just constructs new 'ByteStrings' that
  997 -- are slices of the original.
  998 --
  999 split :: Word8 -> ByteString -> [ByteString]
 1000 split _ (PS _ _ 0) = []
 1001 split w (PS x s l) = loop 0
 1002     where
 1003         loop !n =
 1004             let q = accursedUnutterablePerformIO $ withForeignPtr x $ \p ->
 1005                       memchr (p `plusPtr` (s+n))
 1006                              w (fromIntegral (l-n))
 1007             in if q == nullPtr
 1008                 then [PS x (s+n) (l-n)]
 1009                 else let i = accursedUnutterablePerformIO $ withForeignPtr x $ \p ->
 1010                                return (q `minusPtr` (p `plusPtr` s))
 1011                       in PS x (s+n) (i-n) : loop (i+1)
 1012 
 1013 {-# INLINE split #-}
 1014 
 1015 
 1016 -- | The 'group' function takes a ByteString and returns a list of
 1017 -- ByteStrings such that the concatenation of the result is equal to the
 1018 -- argument.  Moreover, each sublist in the result contains only equal
 1019 -- elements.  For example,
 1020 --
 1021 -- > group "Mississippi" = ["M","i","ss","i","ss","i","pp","i"]
 1022 --
 1023 -- It is a special case of 'groupBy', which allows the programmer to
 1024 -- supply their own equality test. It is about 40% faster than
 1025 -- /groupBy (==)/
 1026 group :: ByteString -> [ByteString]
 1027 group xs
 1028     | null xs   = []
 1029     | otherwise = ys : group zs
 1030     where
 1031         (ys, zs) = spanByte (unsafeHead xs) xs
 1032 
 1033 -- | The 'groupBy' function is the non-overloaded version of 'group'.
 1034 groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
 1035 groupBy k xs
 1036     | null xs   = []
 1037     | otherwise = unsafeTake n xs : groupBy k (unsafeDrop n xs)
 1038     where
 1039         n = 1 + findIndexOrEnd (not . k (unsafeHead xs)) (unsafeTail xs)
 1040 
 1041 -- | /O(n)/ The 'intercalate' function takes a 'ByteString' and a list of
 1042 -- 'ByteString's and concatenates the list after interspersing the first
 1043 -- argument between each element of the list.
 1044 intercalate :: ByteString -> [ByteString] -> ByteString
 1045 intercalate s = concat . (List.intersperse s)
 1046 {-# INLINE [1] intercalate #-}
 1047 
 1048 {-# RULES
 1049 "ByteString specialise intercalate c -> intercalateByte" forall c s1 s2 .
 1050     intercalate (singleton c) (s1 : s2 : []) = intercalateWithByte c s1 s2
 1051   #-}
 1052 
 1053 -- | /O(n)/ intercalateWithByte. An efficient way to join to two ByteStrings
 1054 -- with a char. Around 4 times faster than the generalised join.
 1055 --
 1056 intercalateWithByte :: Word8 -> ByteString -> ByteString -> ByteString
 1057 intercalateWithByte c f@(PS ffp s l) g@(PS fgp t m) = unsafeCreate len $ \ptr ->
 1058     withForeignPtr ffp $ \fp ->
 1059     withForeignPtr fgp $ \gp -> do
 1060         memcpy ptr (fp `plusPtr` s) (fromIntegral l)
 1061         poke (ptr `plusPtr` l) c
 1062         memcpy (ptr `plusPtr` (l + 1)) (gp `plusPtr` t) (fromIntegral m)
 1063     where
 1064       len = length f + length g + 1
 1065 {-# INLINE intercalateWithByte #-}
 1066 
 1067 -- ---------------------------------------------------------------------
 1068 -- Indexing ByteStrings
 1069 
 1070 -- | /O(1)/ 'ByteString' index (subscript) operator, starting from 0.
 1071 index :: ByteString -> Int -> Word8
 1072 index ps n
 1073     | n < 0          = moduleError "index" ("negative index: " ++ show n)
 1074     | n >= length ps = moduleError "index" ("index too large: " ++ show n
 1075                                          ++ ", length = " ++ show (length ps))
 1076     | otherwise      = ps `unsafeIndex` n
 1077 {-# INLINE index #-}
 1078 
 1079 -- | /O(n)/ The 'elemIndex' function returns the index of the first
 1080 -- element in the given 'ByteString' which is equal to the query
 1081 -- element, or 'Nothing' if there is no such element.
 1082 -- This implementation uses memchr(3).
 1083 elemIndex :: Word8 -> ByteString -> Maybe Int
 1084 elemIndex c (PS x s l) = accursedUnutterablePerformIO $ withForeignPtr x $ \p -> do
 1085     let p' = p `plusPtr` s
 1086     q <- memchr p' c (fromIntegral l)
 1087     return $! if q == nullPtr then Nothing else Just $! q `minusPtr` p'
 1088 {-# INLINE elemIndex #-}
 1089 
 1090 -- | /O(n)/ The 'elemIndexEnd' function returns the last index of the
 1091 -- element in the given 'ByteString' which is equal to the query
 1092 -- element, or 'Nothing' if there is no such element. The following
 1093 -- holds:
 1094 --
 1095 -- > elemIndexEnd c xs ==
 1096 -- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs)
 1097 --
 1098 elemIndexEnd :: Word8 -> ByteString -> Maybe Int
 1099 elemIndexEnd ch (PS x s l) = accursedUnutterablePerformIO $ withForeignPtr x $ \p ->
 1100     go (p `plusPtr` s) (l-1)
 1101   where
 1102     go !p !i | i < 0     = return Nothing
 1103              | otherwise = do ch' <- peekByteOff p i
 1104                               if ch == ch'
 1105                                   then return $ Just i
 1106                                   else go p (i-1)
 1107 {-# INLINE elemIndexEnd #-}
 1108 
 1109 -- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning
 1110 -- the indices of all elements equal to the query element, in ascending order.
 1111 -- This implementation uses memchr(3).
 1112 elemIndices :: Word8 -> ByteString -> [Int]
 1113 elemIndices w (PS x s l) = loop 0
 1114     where
 1115         loop !n = let q = accursedUnutterablePerformIO $ withForeignPtr x $ \p ->
 1116                            memchr (p `plusPtr` (n+s))
 1117                                                 w (fromIntegral (l - n))
 1118                   in if q == nullPtr
 1119                         then []
 1120                         else let i = accursedUnutterablePerformIO $ withForeignPtr x $ \p ->
 1121                                        return (q `minusPtr` (p `plusPtr` s))
 1122                              in i : loop (i+1)
 1123 {-# INLINE elemIndices #-}
 1124 
 1125 -- | count returns the number of times its argument appears in the ByteString
 1126 --
 1127 -- > count = length . elemIndices
 1128 --
 1129 -- But more efficiently than using length on the intermediate list.
 1130 count :: Word8 -> ByteString -> Int
 1131 count w (PS x s m) = accursedUnutterablePerformIO $ withForeignPtr x $ \p ->
 1132     fmap fromIntegral $ c_count (p `plusPtr` s) (fromIntegral m) w
 1133 {-# INLINE count #-}
 1134 
 1135 -- | The 'findIndex' function takes a predicate and a 'ByteString' and
 1136 -- returns the index of the first element in the ByteString
 1137 -- satisfying the predicate.
 1138 findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int
 1139 findIndex k (PS x s l) = accursedUnutterablePerformIO $ withForeignPtr x $ \f -> go (f `plusPtr` s) 0
 1140   where
 1141     go !ptr !n | n >= l    = return Nothing
 1142                | otherwise = do w <- peek ptr
 1143                                 if k w
 1144                                   then return (Just n)
 1145                                   else go (ptr `plusPtr` 1) (n+1)
 1146 {-# INLINE findIndex #-}
 1147 
 1148 -- | The 'findIndices' function extends 'findIndex', by returning the
 1149 -- indices of all elements satisfying the predicate, in ascending order.
 1150 findIndices :: (Word8 -> Bool) -> ByteString -> [Int]
 1151 findIndices p ps = loop 0 ps
 1152    where
 1153      loop !n !qs | null qs           = []
 1154                  | p (unsafeHead qs) = n : loop (n+1) (unsafeTail qs)
 1155                  | otherwise         =     loop (n+1) (unsafeTail qs)
 1156 
 1157 -- ---------------------------------------------------------------------
 1158 -- Searching ByteStrings
 1159 
 1160 -- | /O(n)/ 'elem' is the 'ByteString' membership predicate.
 1161 elem :: Word8 -> ByteString -> Bool
 1162 elem c ps = case elemIndex c ps of Nothing -> False ; _ -> True
 1163 {-# INLINE elem #-}
 1164 
 1165 -- | /O(n)/ 'notElem' is the inverse of 'elem'
 1166 notElem :: Word8 -> ByteString -> Bool
 1167 notElem c ps = not (elem c ps)
 1168 {-# INLINE notElem #-}
 1169 
 1170 -- | /O(n)/ 'filter', applied to a predicate and a ByteString,
 1171 -- returns a ByteString containing those characters that satisfy the
 1172 -- predicate.
 1173 filter :: (Word8 -> Bool) -> ByteString -> ByteString
 1174 filter k ps@(PS x s l)
 1175     | null ps   = ps
 1176     | otherwise = unsafePerformIO $ createAndTrim l $ \p -> withForeignPtr x $ \f -> do
 1177         t <- go (f `plusPtr` s) p (f `plusPtr` (s + l))
 1178         return $! t `minusPtr` p -- actual length
 1179     where
 1180         go !f !t !end | f == end  = return t
 1181                       | otherwise = do
 1182                           w <- peek f
 1183                           if k w
 1184                             then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) end
 1185                             else             go (f `plusPtr` 1) t               end
 1186 {-# INLINE filter #-}
 1187 
 1188 {-
 1189 --
 1190 -- | /O(n)/ A first order equivalent of /filter . (==)/, for the common
 1191 -- case of filtering a single byte. It is more efficient to use
 1192 -- /filterByte/ in this case.
 1193 --
 1194 -- > filterByte == filter . (==)
 1195 --
 1196 -- filterByte is around 10x faster, and uses much less space, than its
 1197 -- filter equivalent
 1198 --
 1199 filterByte :: Word8 -> ByteString -> ByteString
 1200 filterByte w ps = replicate (count w ps) w
 1201 {-# INLINE filterByte #-}
 1202 
 1203 {-# RULES
 1204 "ByteString specialise filter (== x)" forall x.
 1205     filter ((==) x) = filterByte x
 1206 "ByteString specialise filter (== x)" forall x.
 1207     filter (== x) = filterByte x
 1208   #-}
 1209 -}
 1210 
 1211 -- | /O(n)/ The 'find' function takes a predicate and a ByteString,
 1212 -- and returns the first element in matching the predicate, or 'Nothing'
 1213 -- if there is no such element.
 1214 --
 1215 -- > find f p = case findIndex f p of Just n -> Just (p ! n) ; _ -> Nothing
 1216 --
 1217 find :: (Word8 -> Bool) -> ByteString -> Maybe Word8
 1218 find f p = case findIndex f p of
 1219                     Just n -> Just (p `unsafeIndex` n)
 1220                     _      -> Nothing
 1221 {-# INLINE find #-}
 1222 
 1223 -- | /O(n)/ The 'partition' function takes a predicate a ByteString and returns
 1224 -- the pair of ByteStrings with elements which do and do not satisfy the
 1225 -- predicate, respectively; i.e.,
 1226 --
 1227 -- > partition p bs == (filter p xs, filter (not . p) xs)
 1228 --
 1229 partition :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 1230 partition f s = unsafeDupablePerformIO $
 1231     do fp' <- mallocByteString len
 1232        withForeignPtr fp' $ \p ->
 1233            do let end = p `plusPtr` (len - 1)
 1234               mid <- sep 0 p end
 1235               rev mid end
 1236               let i = mid `minusPtr` p
 1237               return (PS fp' 0 i,
 1238                       PS fp' i (len - i))
 1239   where
 1240     len  = length s
 1241     incr = (`plusPtr` 1)
 1242     decr = (`plusPtr` (-1))
 1243 
 1244     sep !i !p1 !p2
 1245        | i == len  = return p1
 1246        | f w       = do poke p1 w
 1247                         sep (i + 1) (incr p1) p2
 1248        | otherwise = do poke p2 w
 1249                         sep (i + 1) p1 (decr p2)
 1250       where
 1251         w = s `unsafeIndex` i
 1252 
 1253     rev !p1 !p2
 1254       | p1 >= p2  = return ()
 1255       | otherwise = do a <- peek p1
 1256                        b <- peek p2
 1257                        poke p1 b
 1258                        poke p2 a
 1259                        rev (incr p1) (decr p2)
 1260 
 1261 -- --------------------------------------------------------------------
 1262 -- Sarching for substrings
 1263 
 1264 -- |/O(n)/ The 'isPrefixOf' function takes two ByteStrings and returns 'True'
 1265 -- if the first is a prefix of the second.
 1266 isPrefixOf :: ByteString -> ByteString -> Bool
 1267 isPrefixOf (PS x1 s1 l1) (PS x2 s2 l2)
 1268     | l1 == 0   = True
 1269     | l2 < l1   = False
 1270     | otherwise = accursedUnutterablePerformIO $ withForeignPtr x1 $ \p1 ->
 1271         withForeignPtr x2 $ \p2 -> do
 1272             i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (fromIntegral l1)
 1273             return $! i == 0
 1274 
 1275 -- | /O(n)/ The 'stripPrefix' function takes two ByteStrings and returns 'Just'
 1276 -- the remainder of the second iff the first is its prefix, and otherwise
 1277 -- 'Nothing'.
 1278 stripPrefix :: ByteString -> ByteString -> Maybe ByteString
 1279 stripPrefix bs1@(PS _ _ l1) bs2
 1280    | bs1 `isPrefixOf` bs2 = Just (unsafeDrop l1 bs2)
 1281    | otherwise = Nothing
 1282 
 1283 -- | /O(n)/ The 'isSuffixOf' function takes two ByteStrings and returns 'True'
 1284 -- iff the first is a suffix of the second.
 1285 --
 1286 -- The following holds:
 1287 --
 1288 -- > isSuffixOf x y == reverse x `isPrefixOf` reverse y
 1289 --
 1290 -- However, the real implemenation uses memcmp to compare the end of the
 1291 -- string only, with no reverse required..
 1292 isSuffixOf :: ByteString -> ByteString -> Bool
 1293 isSuffixOf (PS x1 s1 l1) (PS x2 s2 l2)
 1294     | l1 == 0   = True
 1295     | l2 < l1   = False
 1296     | otherwise = accursedUnutterablePerformIO $ withForeignPtr x1 $ \p1 ->
 1297         withForeignPtr x2 $ \p2 -> do
 1298             i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2 `plusPtr` (l2 - l1)) (fromIntegral l1)
 1299             return $! i == 0
 1300 
 1301 -- | /O(n)/ The 'stripSuffix' function takes two ByteStrings and returns 'Just'
 1302 -- the remainder of the second iff the first is its suffix, and otherwise
 1303 -- 'Nothing'.
 1304 stripSuffix :: ByteString -> ByteString -> Maybe ByteString
 1305 stripSuffix bs1@(PS _ _ l1) bs2@(PS _ _ l2)
 1306    | bs1 `isSuffixOf` bs2 = Just (unsafeTake (l2 - l1) bs2)
 1307    | otherwise = Nothing
 1308 
 1309 -- | Check whether one string is a substring of another. @isInfixOf
 1310 -- p s@ is equivalent to @not (null (findSubstrings p s))@.
 1311 isInfixOf :: ByteString -> ByteString -> Bool
 1312 isInfixOf p s = isJust (findSubstring p s)
 1313 
 1314 -- | Break a string on a substring, returning a pair of the part of the
 1315 -- string prior to the match, and the rest of the string.
 1316 --
 1317 -- The following relationships hold:
 1318 --
 1319 -- > break (== c) l == breakSubstring (singleton c) l
 1320 --
 1321 -- and:
 1322 --
 1323 -- > findSubstring s l ==
 1324 -- >    if null s then Just 0
 1325 -- >              else case breakSubstring s l of
 1326 -- >                       (x,y) | null y    -> Nothing
 1327 -- >                             | otherwise -> Just (length x)
 1328 --
 1329 -- For example, to tokenise a string, dropping delimiters:
 1330 --
 1331 -- > tokenise x y = h : if null t then [] else tokenise x (drop (length x) t)
 1332 -- >     where (h,t) = breakSubstring x y
 1333 --
 1334 -- To skip to the first occurence of a string:
 1335 --
 1336 -- > snd (breakSubstring x y)
 1337 --
 1338 -- To take the parts of a string before a delimiter:
 1339 --
 1340 -- > fst (breakSubstring x y)
 1341 --
 1342 -- Note that calling `breakSubstring x` does some preprocessing work, so
 1343 -- you should avoid unnecessarily duplicating breakSubstring calls with the same
 1344 -- pattern.
 1345 --
 1346 breakSubstring :: ByteString -- ^ String to search for
 1347                -> ByteString -- ^ String to search in
 1348                -> (ByteString,ByteString) -- ^ Head and tail of string broken at substring
 1349 breakSubstring pat =
 1350   case lp of
 1351     0 -> \src -> (empty,src)
 1352     1 -> breakByte (unsafeHead pat)
 1353     _ -> if lp * 8 <= finiteBitSize (0 :: Word)
 1354              then shift
 1355              else karpRabin
 1356   where
 1357     unsafeSplitAt i s = (unsafeTake i s, unsafeDrop i s)
 1358     lp                = length pat
 1359     karpRabin :: ByteString -> (ByteString, ByteString)
 1360     karpRabin src
 1361         | length src < lp = (src,empty)
 1362         | otherwise = search (rollingHash $ unsafeTake lp src) lp
 1363       where
 1364         k           = 2891336453 :: Word32
 1365         rollingHash = foldl' (\h b -> h * k + fromIntegral b) 0
 1366         hp          = rollingHash pat
 1367         m           = k ^ lp
 1368         get = fromIntegral . unsafeIndex src
 1369         search !hs !i
 1370             | hp == hs && pat == unsafeTake lp b = u
 1371             | length src <= i                    = (src,empty) -- not found
 1372             | otherwise                          = search hs' (i + 1)
 1373           where
 1374             u@(_, b) = unsafeSplitAt (i - lp) src
 1375             hs' = hs * k +
 1376                   get i -
 1377                   m * get (i - lp)
 1378     {-# INLINE karpRabin #-}
 1379 
 1380     shift :: ByteString -> (ByteString, ByteString)
 1381     shift !src
 1382         | length src < lp = (src,empty)
 1383         | otherwise       = search (intoWord $ unsafeTake lp src) lp
 1384       where
 1385         intoWord :: ByteString -> Word
 1386         intoWord = foldl' (\w b -> (w `shiftL` 8) .|. fromIntegral b) 0
 1387         wp   = intoWord pat
 1388         mask = (1 `shiftL` (8 * lp)) - 1
 1389         search !w !i
 1390             | w == wp         = unsafeSplitAt (i - lp) src
 1391             | length src <= i = (src, empty)
 1392             | otherwise       = search w' (i + 1)
 1393           where
 1394             b  = fromIntegral (unsafeIndex src i)
 1395             w' = mask .&. ((w `shiftL` 8) .|. b)
 1396     {-# INLINE shift #-}
 1397 
 1398 -- | Get the first index of a substring in another string,
 1399 --   or 'Nothing' if the string is not found.
 1400 --   @findSubstring p s@ is equivalent to @listToMaybe (findSubstrings p s)@.
 1401 findSubstring :: ByteString -- ^ String to search for.
 1402               -> ByteString -- ^ String to seach in.
 1403               -> Maybe Int
 1404 findSubstring pat src
 1405     | null pat && null src = Just 0
 1406     | null b = Nothing
 1407     | otherwise = Just (length a)
 1408   where (a, b) = breakSubstring pat src
 1409 
 1410 {-# DEPRECATED findSubstring "findSubstring is deprecated in favour of breakSubstring." #-}
 1411 
 1412 -- | Find the indexes of all (possibly overlapping) occurances of a
 1413 -- substring in a string.
 1414 --
 1415 findSubstrings :: ByteString -- ^ String to search for.
 1416                -> ByteString -- ^ String to seach in.
 1417                -> [Int]
 1418 findSubstrings pat src
 1419     | null pat        = [0 .. ls]
 1420     | otherwise       = search 0
 1421   where
 1422     lp = length pat
 1423     ls = length src
 1424     search !n
 1425         | (n > ls - lp) || null b = []
 1426         | otherwise = let k = n + length a
 1427                       in  k : search (k + lp)
 1428       where
 1429         (a, b) = breakSubstring pat (unsafeDrop n src)
 1430 
 1431 {-# DEPRECATED findSubstrings "findSubstrings is deprecated in favour of breakSubstring." #-}
 1432 
 1433 -- ---------------------------------------------------------------------
 1434 -- Zipping
 1435 
 1436 -- | /O(n)/ 'zip' takes two ByteStrings and returns a list of
 1437 -- corresponding pairs of bytes. If one input ByteString is short,
 1438 -- excess elements of the longer ByteString are discarded. This is
 1439 -- equivalent to a pair of 'unpack' operations.
 1440 zip :: ByteString -> ByteString -> [(Word8,Word8)]
 1441 zip ps qs
 1442     | null ps || null qs = []
 1443     | otherwise = (unsafeHead ps, unsafeHead qs) : zip (unsafeTail ps) (unsafeTail qs)
 1444 
 1445 -- | 'zipWith' generalises 'zip' by zipping with the function given as
 1446 -- the first argument, instead of a tupling function.  For example,
 1447 -- @'zipWith' (+)@ is applied to two ByteStrings to produce the list of
 1448 -- corresponding sums.
 1449 zipWith :: (Word8 -> Word8 -> a) -> ByteString -> ByteString -> [a]
 1450 zipWith f ps qs
 1451     | null ps || null qs = []
 1452     | otherwise = f (unsafeHead ps) (unsafeHead qs) : zipWith f (unsafeTail ps) (unsafeTail qs)
 1453 {-# NOINLINE [1] zipWith #-}
 1454 
 1455 --
 1456 -- | A specialised version of zipWith for the common case of a
 1457 -- simultaneous map over two bytestrings, to build a 3rd. Rewrite rules
 1458 -- are used to automatically covert zipWith into zipWith' when a pack is
 1459 -- performed on the result of zipWith.
 1460 --
 1461 zipWith' :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString -> ByteString
 1462 zipWith' f (PS fp s l) (PS fq t m) = unsafeDupablePerformIO $
 1463     withForeignPtr fp $ \a ->
 1464     withForeignPtr fq $ \b ->
 1465     create len $ zipWith_ 0 (a `plusPtr` s) (b `plusPtr` t)
 1466   where
 1467     zipWith_ :: Int -> Ptr Word8 -> Ptr Word8 -> Ptr Word8 -> IO ()
 1468     zipWith_ !n !p1 !p2 !r
 1469        | n >= len = return ()
 1470        | otherwise = do
 1471             x <- peekByteOff p1 n
 1472             y <- peekByteOff p2 n
 1473             pokeByteOff r n (f x y)
 1474             zipWith_ (n+1) p1 p2 r
 1475 
 1476     len = min l m
 1477 {-# INLINE zipWith' #-}
 1478 
 1479 {-# RULES
 1480 "ByteString specialise zipWith" forall (f :: Word8 -> Word8 -> Word8) p q .
 1481     zipWith f p q = unpack (zipWith' f p q)
 1482   #-}
 1483 
 1484 -- | /O(n)/ 'unzip' transforms a list of pairs of bytes into a pair of
 1485 -- ByteStrings. Note that this performs two 'pack' operations.
 1486 unzip :: [(Word8,Word8)] -> (ByteString,ByteString)
 1487 unzip ls = (pack (P.map fst ls), pack (P.map snd ls))
 1488 {-# INLINE unzip #-}
 1489 
 1490 -- ---------------------------------------------------------------------
 1491 -- Special lists
 1492 
 1493 -- | /O(n)/ Return all initial segments of the given 'ByteString', shortest first.
 1494 inits :: ByteString -> [ByteString]
 1495 inits (PS x s l) = [PS x s n | n <- [0..l]]
 1496 
 1497 -- | /O(n)/ Return all final segments of the given 'ByteString', longest first.
 1498 tails :: ByteString -> [ByteString]
 1499 tails p | null p    = [empty]
 1500         | otherwise = p : tails (unsafeTail p)
 1501 
 1502 -- less efficent spacewise: tails (PS x s l) = [PS x (s+n) (l-n) | n <- [0..l]]
 1503 
 1504 -- ---------------------------------------------------------------------
 1505 -- ** Ordered 'ByteString's
 1506 
 1507 -- | /O(n)/ Sort a ByteString efficiently, using counting sort.
 1508 sort :: ByteString -> ByteString
 1509 sort (PS input s l) = unsafeCreate l $ \p -> allocaArray 256 $ \arr -> do
 1510 
 1511     _ <- memset (castPtr arr) 0 (256 * fromIntegral (sizeOf (undefined :: CSize)))
 1512     withForeignPtr input (\x -> countOccurrences arr (x `plusPtr` s) l)
 1513 
 1514     let go 256 !_   = return ()
 1515         go i   !ptr = do n <- peekElemOff arr i
 1516                          when (n /= 0) $ memset ptr (fromIntegral i) n >> return ()
 1517                          go (i + 1) (ptr `plusPtr` (fromIntegral n))
 1518     go 0 p
 1519   where
 1520     -- | Count the number of occurrences of each byte.
 1521     -- Used by 'sort'
 1522     --
 1523     countOccurrences :: Ptr CSize -> Ptr Word8 -> Int -> IO ()
 1524     countOccurrences !counts !str !len = go 0
 1525      where
 1526         go !i | i == len    = return ()
 1527               | otherwise = do k <- fromIntegral `fmap` peekElemOff str i
 1528                                x <- peekElemOff counts k
 1529                                pokeElemOff counts k (x + 1)
 1530                                go (i + 1)
 1531 
 1532 
 1533 -- ---------------------------------------------------------------------
 1534 -- Low level constructors
 1535 
 1536 -- | /O(n) construction/ Use a @ByteString@ with a function requiring a
 1537 -- null-terminated @CString@.  The @CString@ is a copy and will be freed
 1538 -- automatically.
 1539 useAsCString :: ByteString -> (CString -> IO a) -> IO a
 1540 useAsCString (PS fp o l) action = do
 1541  allocaBytes (l+1) $ \buf ->
 1542    withForeignPtr fp $ \p -> do
 1543      memcpy buf (p `plusPtr` o) (fromIntegral l)
 1544      pokeByteOff buf l (0::Word8)
 1545      action (castPtr buf)
 1546 
 1547 -- | /O(n) construction/ Use a @ByteString@ with a function requiring a @CStringLen@.
 1548 -- As for @useAsCString@ this function makes a copy of the original @ByteString@.
 1549 useAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a
 1550 useAsCStringLen p@(PS _ _ l) f = useAsCString p $ \cstr -> f (cstr,l)
 1551 
 1552 ------------------------------------------------------------------------
 1553 
 1554 -- | /O(n)./ Construct a new @ByteString@ from a @CString@. The
 1555 -- resulting @ByteString@ is an immutable copy of the original
 1556 -- @CString@, and is managed on the Haskell heap. The original
 1557 -- @CString@ must be null terminated.
 1558 packCString :: CString -> IO ByteString
 1559 packCString cstr = do
 1560     len <- c_strlen cstr
 1561     packCStringLen (cstr, fromIntegral len)
 1562 
 1563 -- | /O(n)./ Construct a new @ByteString@ from a @CStringLen@. The
 1564 -- resulting @ByteString@ is an immutable copy of the original @CStringLen@.
 1565 -- The @ByteString@ is a normal Haskell value and will be managed on the
 1566 -- Haskell heap.
 1567 packCStringLen :: CStringLen -> IO ByteString
 1568 packCStringLen (cstr, len) | len >= 0 = create len $ \p ->
 1569     memcpy p (castPtr cstr) (fromIntegral len)
 1570 packCStringLen (_, len) =
 1571     moduleErrorIO "packCStringLen" ("negative length: " ++ show len)
 1572 
 1573 ------------------------------------------------------------------------
 1574 
 1575 -- | /O(n)/ Make a copy of the 'ByteString' with its own storage.
 1576 -- This is mainly useful to allow the rest of the data pointed
 1577 -- to by the 'ByteString' to be garbage collected, for example
 1578 -- if a large string has been read in, and only a small part of it
 1579 -- is needed in the rest of the program.
 1580 --
 1581 copy :: ByteString -> ByteString
 1582 copy (PS x s l) = unsafeCreate l $ \p -> withForeignPtr x $ \f ->
 1583     memcpy p (f `plusPtr` s) (fromIntegral l)
 1584 
 1585 -- ---------------------------------------------------------------------
 1586 -- Line IO
 1587 
 1588 -- | Read a line from stdin.
 1589 getLine :: IO ByteString
 1590 getLine = hGetLine stdin
 1591 
 1592 -- | Read a line from a handle
 1593 
 1594 hGetLine :: Handle -> IO ByteString
 1595 hGetLine h =
 1596   wantReadableHandle_ "Data.ByteString.hGetLine" h $
 1597     \ h_@Handle__{haByteBuffer} -> do
 1598       flushCharReadBuffer h_
 1599       buf <- readIORef haByteBuffer
 1600       if isEmptyBuffer buf
 1601          then fill h_ buf 0 []
 1602          else haveBuf h_ buf 0 []
 1603  where
 1604 
 1605   fill h_@Handle__{haByteBuffer,haDevice} buf !len xss = do
 1606     (r,buf') <- Buffered.fillReadBuffer haDevice buf
 1607     if r == 0
 1608        then do writeIORef haByteBuffer buf{ bufR=0, bufL=0 }
 1609                if len > 0
 1610                   then mkBigPS len xss
 1611                   else ioe_EOF
 1612        else haveBuf h_ buf' len xss
 1613 
 1614   haveBuf h_@Handle__{haByteBuffer}
 1615           buf@Buffer{ bufRaw=raw, bufR=w, bufL=r }
 1616           len xss =
 1617     do
 1618         off <- findEOL r w raw
 1619         let new_len = len + off - r
 1620         xs <- mkPS raw r off
 1621 
 1622       -- if eol == True, then off is the offset of the '\n'
 1623       -- otherwise off == w and the buffer is now empty.
 1624         if off /= w
 1625             then do if (w == off + 1)
 1626                             then writeIORef haByteBuffer buf{ bufL=0, bufR=0 }
 1627                             else writeIORef haByteBuffer buf{ bufL = off + 1 }
 1628                     mkBigPS new_len (xs:xss)
 1629             else do
 1630                  fill h_ buf{ bufL=0, bufR=0 } new_len (xs:xss)
 1631 
 1632   -- find the end-of-line character, if there is one
 1633   findEOL r w raw
 1634         | r == w = return w
 1635         | otherwise =  do
 1636             c <- readWord8Buf raw r
 1637             if c == fromIntegral (ord '\n')
 1638                 then return r -- NB. not r+1: don't include the '\n'
 1639                 else findEOL (r+1) w raw
 1640 
 1641 mkPS :: RawBuffer Word8 -> Int -> Int -> IO ByteString
 1642 mkPS buf start end =
 1643  create len $ \p ->
 1644    withRawBuffer buf $ \pbuf -> do
 1645    copyBytes p (pbuf `plusPtr` start) len
 1646  where
 1647    len = end - start
 1648 
 1649 mkBigPS :: Int -> [ByteString] -> IO ByteString
 1650 mkBigPS _ [ps] = return ps
 1651 mkBigPS _ pss = return $! concat (P.reverse pss)
 1652 
 1653 -- ---------------------------------------------------------------------
 1654 -- Block IO
 1655 
 1656 -- | Outputs a 'ByteString' to the specified 'Handle'.
 1657 hPut :: Handle -> ByteString -> IO ()
 1658 hPut _ (PS _  _ 0) = return ()
 1659 hPut h (PS ps s l) = withForeignPtr ps $ \p-> hPutBuf h (p `plusPtr` s) l
 1660 
 1661 -- | Similar to 'hPut' except that it will never block. Instead it returns
 1662 -- any tail that did not get written. This tail may be 'empty' in the case that
 1663 -- the whole string was written, or the whole original string if nothing was
 1664 -- written. Partial writes are also possible.
 1665 --
 1666 -- Note: on Windows and with Haskell implementation other than GHC, this
 1667 -- function does not work correctly; it behaves identically to 'hPut'.
 1668 --
 1669 hPutNonBlocking :: Handle -> ByteString -> IO ByteString
 1670 hPutNonBlocking h bs@(PS ps s l) = do
 1671   bytesWritten <- withForeignPtr ps $ \p-> hPutBufNonBlocking h (p `plusPtr` s) l
 1672   return $! drop bytesWritten bs
 1673 
 1674 -- | A synonym for @hPut@, for compatibility
 1675 hPutStr :: Handle -> ByteString -> IO ()
 1676 hPutStr = hPut
 1677 
 1678 -- | Write a ByteString to a handle, appending a newline byte
 1679 hPutStrLn :: Handle -> ByteString -> IO ()
 1680 hPutStrLn h ps
 1681     | length ps < 1024 = hPut h (ps `snoc` 0x0a)
 1682     | otherwise        = hPut h ps >> hPut h (singleton (0x0a)) -- don't copy
 1683 
 1684 -- | Write a ByteString to stdout
 1685 putStr :: ByteString -> IO ()
 1686 putStr = hPut stdout
 1687 
 1688 -- | Write a ByteString to stdout, appending a newline byte
 1689 putStrLn :: ByteString -> IO ()
 1690 putStrLn = hPutStrLn stdout
 1691 
 1692 {-# DEPRECATED hPutStrLn
 1693     "Use Data.ByteString.Char8.hPutStrLn instead. (Functions that rely on ASCII encodings belong in Data.ByteString.Char8)"
 1694   #-}
 1695 {-# DEPRECATED putStrLn
 1696     "Use Data.ByteString.Char8.putStrLn instead. (Functions that rely on ASCII encodings belong in Data.ByteString.Char8)"
 1697   #-}
 1698 
 1699 ------------------------------------------------------------------------
 1700 -- Low level IO
 1701 
 1702 -- | Read a 'ByteString' directly from the specified 'Handle'.  This
 1703 -- is far more efficient than reading the characters into a 'String'
 1704 -- and then using 'pack'. First argument is the Handle to read from,
 1705 -- and the second is the number of bytes to read. It returns the bytes
 1706 -- read, up to n, or 'empty' if EOF has been reached.
 1707 --
 1708 -- 'hGet' is implemented in terms of 'hGetBuf'.
 1709 --
 1710 -- If the handle is a pipe or socket, and the writing end
 1711 -- is closed, 'hGet' will behave as if EOF was reached.
 1712 --
 1713 hGet :: Handle -> Int -> IO ByteString
 1714 hGet h i
 1715     | i >  0    = createAndTrim i $ \p -> hGetBuf h p i
 1716     | i == 0    = return empty
 1717     | otherwise = illegalBufferSize h "hGet" i
 1718 
 1719 -- | hGetNonBlocking is similar to 'hGet', except that it will never block
 1720 -- waiting for data to become available, instead it returns only whatever data
 1721 -- is available.  If there is no data available to be read, 'hGetNonBlocking'
 1722 -- returns 'empty'.
 1723 --
 1724 -- Note: on Windows and with Haskell implementation other than GHC, this
 1725 -- function does not work correctly; it behaves identically to 'hGet'.
 1726 --
 1727 hGetNonBlocking :: Handle -> Int -> IO ByteString
 1728 hGetNonBlocking h i
 1729     | i >  0    = createAndTrim i $ \p -> hGetBufNonBlocking h p i
 1730     | i == 0    = return empty
 1731     | otherwise = illegalBufferSize h "hGetNonBlocking" i
 1732 
 1733 -- | Like 'hGet', except that a shorter 'ByteString' may be returned
 1734 -- if there are not enough bytes immediately available to satisfy the
 1735 -- whole request.  'hGetSome' only blocks if there is no data
 1736 -- available, and EOF has not yet been reached.
 1737 --
 1738 hGetSome :: Handle -> Int -> IO ByteString
 1739 hGetSome hh i
 1740 #if MIN_VERSION_base(4,3,0)
 1741     | i >  0    = createAndTrim i $ \p -> hGetBufSome hh p i
 1742 #else
 1743     | i >  0    = let
 1744                    loop = do
 1745                      s <- hGetNonBlocking hh i
 1746                      if not (null s)
 1747                         then return s
 1748                         else do eof <- hIsEOF hh
 1749                                 if eof then return s
 1750                                        else hWaitForInput hh (-1) >> loop
 1751                                          -- for this to work correctly, the
 1752                                          -- Handle should be in binary mode
 1753                                          -- (see GHC ticket #3808)
 1754                   in loop
 1755 #endif
 1756     | i == 0    = return empty
 1757     | otherwise = illegalBufferSize hh "hGetSome" i
 1758 
 1759 illegalBufferSize :: Handle -> String -> Int -> IO a
 1760 illegalBufferSize handle fn sz =
 1761     ioError (mkIOError illegalOperationErrorType msg (Just handle) Nothing)
 1762     --TODO: System.IO uses InvalidArgument here, but it's not exported :-(
 1763     where
 1764       msg = fn ++ ": illegal ByteString size " ++ showsPrec 9 sz []
 1765 
 1766 
 1767 -- | Read a handle's entire contents strictly into a 'ByteString'.
 1768 --
 1769 -- This function reads chunks at a time, increasing the chunk size on each
 1770 -- read. The final string is then realloced to the appropriate size. For
 1771 -- files > half of available memory, this may lead to memory exhaustion.
 1772 -- Consider using 'readFile' in this case.
 1773 --
 1774 -- The Handle is closed once the contents have been read,
 1775 -- or if an exception is thrown.
 1776 --
 1777 hGetContents :: Handle -> IO ByteString
 1778 hGetContents hnd = do
 1779     bs <- hGetContentsSizeHint hnd 1024 2048
 1780             `finally` hClose hnd
 1781     -- don't waste too much space for small files:
 1782     if length bs < 900
 1783       then return $! copy bs
 1784       else return bs
 1785 
 1786 hGetContentsSizeHint :: Handle
 1787                      -> Int -- ^ first read size
 1788                      -> Int -- ^ initial buffer size increment
 1789                      -> IO ByteString
 1790 hGetContentsSizeHint hnd =
 1791     readChunks []
 1792   where
 1793     readChunks chunks sz sz' = do
 1794       fp        <- mallocByteString sz
 1795       readcount <- withForeignPtr fp $ \buf -> hGetBuf hnd buf sz
 1796       let chunk = PS fp 0 readcount
 1797       -- We rely on the hGetBuf behaviour (not hGetBufSome) where it reads up
 1798       -- to the size we ask for, or EOF. So short reads indicate EOF.
 1799       if readcount < sz && sz > 0
 1800         then return $! concat (P.reverse (chunk : chunks))
 1801         else readChunks (chunk : chunks) sz' ((sz+sz') `min` 32752)
 1802              -- we grow the buffer sizes, but not too huge
 1803              -- we concatenate in the end anyway
 1804 
 1805 -- | getContents. Read stdin strictly. Equivalent to hGetContents stdin
 1806 -- The 'Handle' is closed after the contents have been read.
 1807 --
 1808 getContents :: IO ByteString
 1809 getContents = hGetContents stdin
 1810 
 1811 -- | The interact function takes a function of type @ByteString -> ByteString@
 1812 -- as its argument. The entire input from the standard input device is passed
 1813 -- to this function as its argument, and the resulting string is output on the
 1814 -- standard output device.
 1815 --
 1816 interact :: (ByteString -> ByteString) -> IO ()
 1817 interact transformer = putStr . transformer =<< getContents
 1818 
 1819 -- | Read an entire file strictly into a 'ByteString'.
 1820 --
 1821 readFile :: FilePath -> IO ByteString
 1822 readFile f =
 1823     bracket (openBinaryFile f ReadMode) hClose $ \h -> do
 1824       filesz <- hFileSize h
 1825       let readsz = (fromIntegral filesz `max` 0) + 1
 1826       hGetContentsSizeHint h readsz (readsz `max` 255)
 1827       -- Our initial size is one bigger than the file size so that in the
 1828       -- typical case we will read the whole file in one go and not have
 1829       -- to allocate any more chunks. We'll still do the right thing if the
 1830       -- file size is 0 or is changed before we do the read.
 1831 
 1832 -- | Write a 'ByteString' to a file.
 1833 writeFile :: FilePath -> ByteString -> IO ()
 1834 writeFile f txt = bracket (openBinaryFile f WriteMode) hClose
 1835     (\h -> hPut h txt)
 1836 
 1837 -- | Append a 'ByteString' to a file.
 1838 appendFile :: FilePath -> ByteString -> IO ()
 1839 appendFile f txt = bracket (openBinaryFile f AppendMode) hClose
 1840     (\h -> hPut h txt)
 1841 
 1842 -- ---------------------------------------------------------------------
 1843 -- Internal utilities
 1844 
 1845 -- | 'findIndexOrEnd' is a variant of findIndex, that returns the length
 1846 -- of the string if no element is found, rather than Nothing.
 1847 findIndexOrEnd :: (Word8 -> Bool) -> ByteString -> Int
 1848 findIndexOrEnd k (PS x s l) =
 1849     accursedUnutterablePerformIO $
 1850       withForeignPtr x $ \f ->
 1851         go (f `plusPtr` s) 0
 1852   where
 1853     go !ptr !n | n >= l    = return l
 1854                | otherwise = do w <- peek ptr
 1855                                 if k w
 1856                                   then return n
 1857                                   else go (ptr `plusPtr` 1) (n+1)
 1858 {-# INLINE findIndexOrEnd #-}
 1859 
 1860 -- Common up near identical calls to `error' to reduce the number
 1861 -- constant strings created when compiled:
 1862 errorEmptyList :: String -> a
 1863 errorEmptyList fun = moduleError fun "empty ByteString"
 1864 {-# NOINLINE errorEmptyList #-}
 1865 
 1866 moduleError :: String -> String -> a
 1867 moduleError fun msg = error (moduleErrorMsg fun msg)
 1868 {-# NOINLINE moduleError #-}
 1869 
 1870 moduleErrorIO :: String -> String -> IO a
 1871 moduleErrorIO fun msg = throwIO . userError $ moduleErrorMsg fun msg
 1872 {-# NOINLINE moduleErrorIO #-}
 1873 
 1874 moduleErrorMsg :: String -> String -> String
 1875 moduleErrorMsg fun msg = "Data.ByteString." ++ fun ++ ':':' ':msg
 1876 
 1877 -- Find from the end of the string using predicate
 1878 findFromEndUntil :: (Word8 -> Bool) -> ByteString -> Int
 1879 findFromEndUntil f ps@(PS x s l) =
 1880     if null ps then 0
 1881     else if f (unsafeLast ps) then l
 1882          else findFromEndUntil f (PS x s (l-1))